Merge branch 'master' into remove-useless-setting-from-perf-test

This commit is contained in:
alexey-milovidov 2020-07-14 09:31:34 +03:00 committed by GitHub
commit 969d9c0d47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
114 changed files with 1919 additions and 692 deletions

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54436)
SET(VERSION_REVISION 54437)
SET(VERSION_MAJOR 20)
SET(VERSION_MINOR 6)
SET(VERSION_MINOR 7)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH efc57fb063b3fb4df968d916720ec4d4ced4642e)
SET(VERSION_DESCRIBE v20.6.1.1-prestable)
SET(VERSION_STRING 20.6.1.1)
SET(VERSION_GITHASH d64e51d1a78c1b53c33915ca0f75c97b2333844f)
SET(VERSION_DESCRIBE v20.7.1.1-prestable)
SET(VERSION_STRING 20.7.1.1)
# end of autochange

View File

@ -24,7 +24,7 @@ set (SRCS
add_library(amqp-cpp ${SRCS})
target_compile_options (amqp-cpp
PUBLIC
PRIVATE
-Wno-old-style-cast
-Wno-inconsistent-missing-destructor-override
-Wno-deprecated
@ -38,7 +38,7 @@ target_compile_options (amqp-cpp
-w
)
target_include_directories (amqp-cpp PUBLIC ${LIBRARY_DIR}/include)
target_include_directories (amqp-cpp SYSTEM PUBLIC ${LIBRARY_DIR}/include)
target_link_libraries (amqp-cpp PUBLIC ssl)

2
contrib/fmtlib vendored

@ -1 +1 @@
Subproject commit 297c3b2ed551a4989826fc8c4780bf533e964bd9
Subproject commit c108ee1d590089ccf642fc85652b845924067af2

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit e2131aa752d7e95441e08f9a18304c1445f2576a
Subproject commit 1b666578c85094306b061352078022f6350bfab8

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.6.1.1) unstable; urgency=low
clickhouse (20.7.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 22 Jun 2020 20:40:23 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 13 Jul 2020 18:25:58 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.6.1.*
ARG version=20.7.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.6.1.*
ARG version=20.7.1.*
ARG gosu_ver=1.10
RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.6.1.*
ARG version=20.7.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -0,0 +1,7 @@
<yandex>
<profiles>
<default>
<max_execution_time>10</max_execution_time>
</default>
</profiles>
</yandex>

View File

@ -46,6 +46,7 @@ function configure
cp -av "$repo_dir"/programs/server/config* db
cp -av "$repo_dir"/programs/server/user* db
cp -av "$repo_dir"/tests/config db/config.d
cp -av "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
}
function watchdog
@ -53,12 +54,13 @@ function watchdog
sleep 3600
echo "Fuzzing run has timed out"
killall -9 clickhouse clickhouse-server clickhouse-client
./clickhouse client --query "select elapsed, query from system.processes" ||:
killall -9 clickhouse clickhouse-server clickhouse-client ||:
}
function fuzz
{
./clickhouse server --config-file db/config.xml -- --path db 2>&1 | tail -1000000 > server.log &
./clickhouse server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
server_pid=$!
kill -0 $server_pid
while ! ./clickhouse client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
@ -67,14 +69,15 @@ function fuzz
echo Server started
fuzzer_exit_code=0
./clickhouse client --query-fuzzer-runs=100 \
./clickhouse client --query-fuzzer-runs=1000 \
< <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \
> >(tail -1000000 > fuzzer.log) \
> >(tail -100000 > fuzzer.log) \
2>&1 \
|| fuzzer_exit_code=$?
echo "Fuzzer exit code is $fuzzer_exit_code"
kill -9 $server_pid
./clickhouse client --query "select elapsed, query from system.processes" ||:
kill -9 $server_pid ||:
return $fuzzer_exit_code
}
@ -107,7 +110,14 @@ case "$stage" in
watchdog_pid=$!
fuzzer_exit_code=0
time fuzz || fuzzer_exit_code=$?
kill $watchdog_pid
kill $watchdog_pid ||:
# Debug
date
sleep 10
jobs
pstree -aspgT
exit $fuzzer_exit_code
;&
esac

View File

@ -282,6 +282,7 @@ do
sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv"
sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv"
sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv"
sed -n "s/^short\t/$test_name\t/p" < "$test_file" >> "analyze/marked-short-queries.tsv"
sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv"
done
unset IFS
@ -291,6 +292,9 @@ clickhouse-local --query "
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
-- Separately process 'partial' queries which we could only run on the new server
-- because they use new functions. We can't make normal stats for them, but still
-- have to show some stats so that the PR author can tweak them.
create view partial_queries as select test, query_index
from file('analyze/partial-queries.tsv', TSV,
'test text, query_index int, servers Array(int)');
@ -303,6 +307,7 @@ create table partial_query_times engine File(TSVWithNamesAndTypes,
group by test, query_index
;
-- Process queries that were run normally, on both servers.
create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "left-query-log.tsv.columns")');
@ -317,7 +322,10 @@ create view query_logs as
select *, 1 version from right_query_log
;
create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-full.tsv')
-- This is a single source of truth on all metrics we have for query runs. The
-- metrics include ProfileEvents from system.query_log, and query run times
-- reported by the perf.py test runner.
create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-arrays.tsv')
as
with (
-- sumMapState with the list of all keys with '-0.' values. Negative zero is because
@ -349,18 +357,29 @@ create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-
where (test, query_index) not in partial_queries
;
create table query_run_metrics engine File(
-- This is just for convenience -- human-readable + easy to make plots.
create table query_run_metrics_denorm engine File(TSV, 'analyze/query-run-metrics-denorm.tsv')
as select test, query_index, metric_names, version, query_id, metric_values
from query_run_metric_arrays
array join metric_names, metric_values
order by test, query_index, metric_names, version, query_id
;
-- This is for statistical processing with eqmed.sql
create table query_run_metrics_for_stats engine File(
TSV, -- do not add header -- will parse with grep
'analyze/query-run-metrics.tsv')
'analyze/query-run-metrics-for-stats.tsv')
as select test, query_index, 0 run, version, metric_values
from query_run_metrics_full
from query_run_metric_arrays
order by test, query_index, run, version
;
-- This is the list of metric names, so that we can join them back after
-- statistical processing.
create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv')
as select metric_names from query_run_metrics_full limit 1
as select metric_names from query_run_metric_arrays limit 1
;
"
" 2> >(tee -a analyze/errors.log 1>&2)
# This is a lateral join in bash... please forgive me.
# We don't have arrayPermute(), so I have to make random permutations with
@ -370,16 +389,16 @@ create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-n
# for each file. I do this in parallel using GNU parallel.
( set +x # do not bloat the log
IFS=$'\n'
for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
for prefix in $(cut -f1,2 "analyze/query-run-metrics-for-stats.tsv" | sort | uniq)
do
file="analyze/tmp/$(echo "$prefix" | sed 's/\t/_/g').tsv"
grep "^$prefix " "analyze/query-run-metrics.tsv" > "$file" &
grep "^$prefix " "analyze/query-run-metrics-for-stats.tsv" > "$file" &
printf "%s\0\n" \
"clickhouse-local \
--file \"$file\" \
--structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
--query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"analyze/query-reports.tsv\"" \
>> \"analyze/query-metric-stats.tsv\"" \
2>> analyze/errors.log \
>> analyze/commands.txt
done
@ -388,6 +407,33 @@ unset IFS
)
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
clickhouse-local --query "
-- Join the metric names back to the metric statistics we've calculated, and make
-- a denormalized table of them -- statistics for all metrics for all queries.
-- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
-- https://github.com/ClickHouse/ClickHouse/issues/11868
-- https://github.com/ClickHouse/ClickHouse/issues/11757
-- Because of this, we make a view with arrays first, and then apply all the
-- array joins.
create view query_metric_stat_arrays as
with (select * from file('analyze/query-run-metric-names.tsv',
TSV, 'n Array(String)')) as metric_name
select test, query_index, metric_name, left, right, diff, stat_threshold
from file('analyze/query-metric-stats.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
order by test, query_index, metric_name
;
create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
'analyze/query-metric-stats-denorm.tsv')
as select test, query_index, metric_name, left, right, diff, stat_threshold
from query_metric_stat_arrays
left array join metric_name, left, right, diff, stat_threshold
order by test, query_index, metric_name
;
" 2> >(tee -a analyze/errors.log 1>&2)
}
# Analyze results
@ -403,58 +449,46 @@ build_log_column_definitions
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
short_query_threshold="0.02"
clickhouse-local --query "
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
;
create view partial_query_times as select * from
file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
'test text, query_index int, time_stddev float, time_median float')
;
-- Report for partial queries that we could only run on the new server (e.g.
-- queries with new functions added in the tested PR).
create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
as select floor(time_median, 3) m, floor(time_stddev / time_median, 3) v,
as select floor(time_median, 3) time,
floor(time_stddev / time_median, 3) relative_time_stddev,
test, query_index, query_display_name
from file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
'test text, query_index int, time_stddev float, time_median float') t
from partial_query_times
join query_display_names using (test, query_index)
order by test, query_index
;
-- WITH, ARRAY JOIN and CROSS JOIN do not like each other:
-- https://github.com/ClickHouse/ClickHouse/issues/11868
-- https://github.com/ClickHouse/ClickHouse/issues/11757
-- Because of this, we make a view with arrays first, and then apply all the
-- array joins.
create view query_metric_stat_arrays as
with (select * from file('analyze/query-run-metric-names.tsv',
TSV, 'n Array(String)')) as metric_name
select metric_name, left, right, diff, stat_threshold, test, query_index,
query_display_name
from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
left join query_display_names
on reports.test = query_display_names.test
and reports.query_index = query_display_names.query_index
;
create table query_metric_stats engine File(TSVWithNamesAndTypes,
'report/query-metric-stats.tsv')
as
select metric_name, left, right, diff, stat_threshold, test, query_index,
query_display_name
from query_metric_stat_arrays
left array join metric_name, left, right, diff, stat_threshold
create view query_metric_stats as
select * from file('analyze/query-metric-stats-denorm.tsv',
TSVWithNamesAndTypes,
'test text, query_index int, metric_name text, left float, right float,
diff float, stat_threshold float')
;
-- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
-- immediately (on the same order of time as noise). We compute average
-- run time between old and new version, and if it is below a threshold,
-- we just skip the query. If there is a significant regression, the
-- average will be above threshold, we'll process it normally and will
-- detect the regression.
(left + right) / 2 < 0.02 as short,
-- Comparison mode doesn't make sense for queries that complete
-- immediately (on the same order of time as noise). If query duration is
-- less that some threshold, we just skip it. If there is a significant
-- regression in such query, the time will exceed the threshold, and we
-- well process it normally and detect the regression.
right < $short_query_threshold as short,
not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
not short and abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
@ -469,63 +503,22 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') thresholds
on query_metric_stats.test = thresholds.test
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
where metric_name = 'server_time'
order by test, query_index, metric_name
;
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select short, changed_fail, unstable_fail, left, right, diff,
stat_threshold, test, query_display_name query
from queries
;
-- save all test runs as JSON for the new comparison page
create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
select test, query_index, query_display_name query,
left, right, diff, stat_threshold, report_threshold,
versions_runs[1] runs_left, versions_runs[2] runs_right
from (
select
test, query_index,
groupArrayInsertAt(runs, version) versions_runs
from (
select
test, query_index, version,
groupArray(metrics[1]) runs
from file('analyze/query-run-metrics.tsv', TSV,
'test text, query_index int, run int, version UInt8, metrics Array(float)')
group by test, query_index, version
)
group by test, query_index
) runs
left join query_display_names
on runs.test = query_display_names.test
and runs.query_index = query_display_names.query_index
left join file('analyze/report-thresholds.tsv',
TSV, 'test text, report_threshold float') thresholds
on runs.test = thresholds.test
left join query_metric_stats
on runs.test = query_metric_stats.test
and runs.query_index = query_metric_stats.query_index
where
query_metric_stats.metric_name = 'server_time'
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
'report/queries-for-flamegraph.tsv') as
select test, query_index from queries where unstable_show or changed_show
;
create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as
create table test_time_changes engine File(TSV, 'report/test-time-changes.tsv') as
select test, queries, average_time_change from (
select test, count(*) queries,
sum(left) as left, sum(right) as right,
@ -536,22 +529,22 @@ create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.ts
)
;
create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as
create table unstable_tests engine File(TSV, 'report/unstable-tests.tsv') as
select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
from queries
group by test
order by total_unstable + total_changed desc
;
create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') as
select test,
queries,
coalesce(total_unstable, 0) total_unstable,
coalesce(total_changed, 0) total_changed,
total_unstable + total_changed total_bad,
coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str
from test_time_changes_tsv
full join unstable_tests_tsv
from test_time_changes
full join unstable_tests
using test
where (abs(average_time_change) > 0.05 and queries > 5)
or (total_bad > 0)
@ -559,28 +552,28 @@ create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.ts
settings join_use_nulls = 1
;
create table query_time engine Memory as select *
create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');
create table wall_clock engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') as
select client, server, floor(client/server, 3) p, test, query_display_name
from query_time left join query_display_names using (test, query_index)
from total_client_time_per_query left join query_display_names using (test, query_index)
where p > 1.02 order by p desc;
create table wall_clock_time_per_test engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table test_time engine Memory as
select test, sum(client) total_client_time,
maxIf(client, not short) query_max,
minIf(client, not short) query_min,
count(*) queries, sum(short) short_queries
from query_time full join queries using (test, query_index)
from total_client_time_per_query full join queries using (test, query_index)
group by test;
create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
select wall_clock.test, real,
create table test_times_report engine File(TSV, 'report/test-times.tsv') as
select wall_clock_time_per_test.test, real,
floor(total_client_time, 3),
queries,
short_queries,
@ -590,23 +583,62 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
from test_time
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock using test
left join wall_clock_time_per_test using test
order by avg_real_per_query desc;
-- report for all queries page, only main metric
create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail,
left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name
from queries order by test, query_index;
-- queries for which we will build flamegraphs (see below)
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
'report/queries-for-flamegraph.tsv') as
select test, query_index from queries where unstable_show or changed_show
;
-- List of queries that have 'short' duration, but are not marked as 'short' by
-- the test author (we report them).
create table unmarked_short_queries_report
engine File(TSV, 'report/unmarked-short-queries.tsv')
as select time, test, query_index, query_display_name
from (
select right time, test, query_index from queries where short
union all
select time_median, test, query_index from partial_query_times
where time_median < $short_query_threshold
) times
left join query_display_names
on times.test = query_display_names.test
and times.query_index = query_display_names.query_index
where (test, query_index) not in
(select * from file('analyze/marked-short-queries.tsv', TSV,
'test text, query_index int'))
order by test, query_index
;
--------------------------------------------------------------------------------
-- various compatibility data formats follow, not related to the main report
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select short, changed_fail, unstable_fail, left, right, diff,
stat_threshold, test, query_display_name query
from queries
;
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
select metric_name, left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name
from query_metric_stats
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
order by test, query_index;
" 2> >(tee -a report/errors.log 1>&2)

View File

@ -37,21 +37,44 @@ available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems:
available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]
# Take care to keep the order of queries -- sometimes we have DROP IF EXISTS
# Takes parallel lists of templates, substitutes them with all combos of
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates):
result = []
for q in query_templates:
def substitute_parameters(query_templates, other_templates = []):
query_results = []
other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates):
keys = set(n for _, n, _, _ in string.Formatter().parse(q) if n)
values = [available_parameters[k] for k in keys]
result.extend([
q.format(**dict(zip(keys, values_combo)))
for values_combo in itertools.product(*values)])
return result
combos = itertools.product(*values)
for c in combos:
with_keys = dict(zip(keys, c))
query_results.append(q.format(**with_keys))
for j, t in enumerate(other_templates):
other_results[j].append(t[i].format(**with_keys))
if len(other_templates):
return query_results, other_results
else:
return query_results
# Build a list of test queries, substituting parameters to query templates,
# and reporting the queries marked as short.
test_queries = []
for e in root.findall('query'):
new_queries = []
if 'short' in e.attrib:
new_queries, [is_short] = substitute_parameters([e.text], [[e.attrib['short']]])
for i, s in enumerate(is_short):
# Don't print this if we only need to print the queries.
if eval(s) and not args.print_queries:
print(f'short\t{i + len(test_queries)}')
else:
new_queries = substitute_parameters([e.text])
test_queries += new_queries
# Build a list of test queries, processing all substitutions
test_query_templates = [q.text for q in root.findall('query')]
test_queries = substitute_parameters(test_query_templates)
# If we're only asked to print the queries, do that and exit
if args.print_queries:
@ -166,7 +189,7 @@ for conn_index, c in enumerate(connections):
c.execute(q)
print(f'fill\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
# Run test queries
# Run test queries.
for query_index, q in enumerate(test_queries):
query_prefix = f'{test_name}.query{query_index}'

View File

@ -196,6 +196,12 @@ if args.report == 'main':
['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
slow_on_client_rows)
unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv')
error_tests += len(unmarked_short_rows)
printSimpleTable('Short queries not marked as short',
['New client time, s', 'Test', '#', 'Query'],
unmarked_short_rows)
def print_partial():
rows = tsvRows('report/partial-queries-report.tsv')
if not rows:

View File

@ -13,7 +13,7 @@ The supported formats are:
| Format | Input | Output |
|-----------------------------------------------------------------|-------|--------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
@ -143,7 +143,7 @@ SELECT * FROM nestedt FORMAT TSV
## TabSeparatedRaw {#tabseparatedraw}
Differs from `TabSeparated` format in that the rows are written without escaping.
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
When parsing with this format, tabs or linefeeds are not allowed in each field.
This format is also available under the name `TSVRaw`.

View File

@ -22,14 +22,14 @@ Strings are compared by bytes. A shorter string is smaller than all strings that
## equals, a = b and a == b operator {#function-equals}
## notEquals, a != b and a \<\> b operator {#function-notequals}
## notEquals, a != b and a <> b operator {#function-notequals}
## less, \< operator {#function-less}
## less, < operator {#function-less}
## greater, \> operator {#function-greater}
## greater, > operator {#function-greater}
## lessOrEquals, \<= operator {#function-lessorequals}
## lessOrEquals, <= operator {#function-lessorequals}
## greaterOrEquals, \>= operator {#function-greaterorequals}
## greaterOrEquals, >= operator {#function-greaterorequals}
[Original article](https://clickhouse.tech/docs/en/query_language/functions/comparison_functions/) <!--hide-->

View File

@ -22,9 +22,9 @@ SELECT [DISTINCT] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]

View File

@ -11,3 +11,47 @@ toc_title: LIMIT
`n` and `m` must be non-negative integers.
If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md) clause that explicitly sorts results, the choice of rows for the result may be arbitrary and non-deterministic.
## LIMIT ... WITH TIES modifier {#limit-with-ties}
When you set `WITH TIES` modifier for `LIMIT n[,m]` and specify `ORDER BY expr_list`, you will get in result first `n` or `n,m` rows and all rows with same `ORDER BY` fields values equal to row at position `n` for `LIMIT n` and `m` for `LIMIT n,m`.
This modifier also can be combined with [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill).
For example, the following query
```sql
SELECT * FROM (
SELECT number%50 AS n FROM numbers(100)
) ORDER BY n LIMIT 0,5
```
returns
```text
┌─n─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
└───┘
```
but after apply `WITH TIES` modifier
```sql
SELECT * FROM (
SELECT number%50 AS n FROM numbers(100)
) ORDER BY n LIMIT 0,5 WITH TIES
```
it returns another rows set
```text
┌─n─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
└───┘
```
cause row number 6 have same value "2" for field `n` as row number 5

View File

@ -69,3 +69,129 @@ If there is not enough RAM, it is possible to perform sorting in external memory
Running a query may use more memory than `max_bytes_before_external_sort`. For this reason, this setting must have a value significantly smaller than `max_memory_usage`. As an example, if your server has 128 GB of RAM and you need to run a single query, set `max_memory_usage` to 100 GB, and `max_bytes_before_external_sort` to 80 GB.
External sorting works much less effectively than sorting in RAM.
## ORDER BY expr WITH FILL modifier {#orderby-with-fill}
This modifier also can be combined with [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties).
`WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters.
All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults.
Use following syntax for filling multiple columns add `WITH FILL` modifier with optional parameters after each field name in `ORDER BY` section.
```sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
```
`WITH FILL` can be applied only for fields with Numeric (all kind of float, decimal, int) or Date/DateTime types.
When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`.
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
For example, the following query
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n
```
returns
```text
┌─n─┬─source───┐
│ 1 │ original │
│ 4 │ original │
│ 7 │ original │
└───┴──────────┘
```
but after apply `WITH FILL` modifier
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
```
returns
```text
┌───n─┬─source───┐
│ 0 │ │
│ 0.5 │ │
│ 1 │ original │
│ 1.5 │ │
│ 2 │ │
│ 2.5 │ │
│ 3 │ │
│ 3.5 │ │
│ 4 │ original │
│ 4.5 │ │
│ 5 │ │
│ 5.5 │ │
│ 7 │ original │
└─────┴──────────┘
```
For the case when we have multiple fields `ORDER BY field2 WITH FILL, field1 WITH FILL` order of filling will follow the order of fields in `ORDER BY` clause.
Example:
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d2 WITH FILL,
d1 WITH FILL STEP 5;
```
returns
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 0000-00-00 │ 1970-01-03 │ │
│ 0000-00-00 │ 1970-01-04 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 0000-00-00 │ 1970-01-06 │ │
│ 0000-00-00 │ 1970-01-07 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
Field `d1` doesn't fill and use default value cause we don't have repeated values for `d2` value, and sequence for `d1` can't be properly calculated.
The following query with a changed field in `ORDER BY`
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d1 WITH FILL STEP 5,
d2 WITH FILL;
```
returns
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 1970-01-16 │ 0000-00-00 │ │
│ 1970-01-21 │ 0000-00-00 │ │
│ 1970-01-26 │ 0000-00-00 │ │
│ 1970-01-31 │ 0000-00-00 │ │
│ 1970-02-05 │ 0000-00-00 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 1970-02-15 │ 0000-00-00 │ │
│ 1970-02-20 │ 0000-00-00 │ │
│ 1970-02-25 │ 0000-00-00 │ │
│ 1970-03-02 │ 0000-00-00 │ │
│ 1970-03-07 │ 0000-00-00 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```

View File

@ -20,9 +20,9 @@ SELECT [DISTINCT] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]

View File

@ -20,9 +20,9 @@ SELECT [DISTINCT] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]

View File

@ -20,9 +20,9 @@ SELECT [DISTINCT] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]

View File

@ -18,9 +18,9 @@ SELECT [DISTINCT] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]

View File

@ -1,3 +1,7 @@
---
toc_title: LIMIT
---
# Секция LIMIT {#limit-clause}
`LIMIT m` позволяет выбрать из результата первые `m` строк.
@ -7,3 +11,47 @@
`n` и `m` должны быть неотрицательными целыми числами.
При отсутствии секции [ORDER BY](order-by.md), однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным.
## Модификатор LIMIT ... WITH TIES {#limit-with-ties}
Когда вы установите модификатор WITH TIES для `LIMIT n[,m]` и указываете `ORDER BY expr_list`, вы получите первые `n` или `n,m` строк и дополнительно все строки с теми же самым значениями полей указанных в `ORDER BY` равными строке на позиции `n` для `LIMIT n` или `m` для `LIMIT n,m`.
Этот модификатор также может быть скомбинирован с [ORDER BY ... WITH FILL модификатором](../../../sql-reference/statements/select/order-by.md#orderby-with-fill)
Для примера следующий запрос
```sql
SELECT * FROM (
SELECT number%50 AS n FROM numbers(100)
) ORDER BY n LIMIT 0,5
```
возвращает
```text
┌─n─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
└───┘
```
но после применения модификатора `WITH TIES`
```sql
SELECT * FROM (
SELECT number%50 AS n FROM numbers(100)
) ORDER BY n LIMIT 0,5 WITH TIES
```
возвращает другой набор строк
```text
┌─n─┐
│ 0 │
│ 0 │
│ 1 │
│ 1 │
│ 2 │
│ 2 │
└───┘
```
поскольку строка на позиции 6 имеет тоже самое значение "2" для поля `n` что и строка на позиции 5

View File

@ -1,3 +1,7 @@
---
toc_title: ORDER BY
---
# Секция ORDER BY {#select-order-by}
Секция `ORDER BY` содержит список выражений, к каждому из которых также может быть приписано `DESC` или `ASC` (направление сортировки). Если ничего не приписано - это аналогично приписыванию `ASC`. `ASC` - сортировка по возрастанию, `DESC` - сортировка по убыванию. Обозначение направления сортировки действует на одно выражение, а не на весь список. Пример: `ORDER BY Visits DESC, SearchPhrase`
@ -31,7 +35,7 @@
└───┴──────┘
```
Выполнение запроса `SELECT * FROM t_null_nan ORDER BY y NULLS FIRST` получить:
Выполните запрос `SELECT * FROM t_null_nan ORDER BY y NULLS FIRST` чтобы получить:
``` text
┌─x─┬────y─┐
@ -66,3 +70,128 @@
Внешняя сортировка работает существенно менее эффективно, чем сортировка в оперативке.
## Модификатор ORDER BY expr WITH FILL {#orderby-with-fill}
Этот модификатор также может быть скобинирован с модификатором [LIMIT ... WITH TIES](../../../sql-reference/statements/select/limit.md#limit-with-ties)
`WITH FILL` модификатор может быть установлен после `ORDER BY expr` с опциональными параметрами `FROM expr`, `TO expr` и `STEP expr`.
Все пропущенные значнеия для колонки `expr` будут заполненые значениями соответсвующими предполагаемой последовательности значений колонки, другие колонки будут заполнены значенями по умолчанию.
Используйте следующую конструкцию для заполнения нескольких колонок с модификатором `WITH FILL` с необязательными параметрами после каждого имени поля в секции `ORDER BY`.
```sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
```
`WITH FILL` может быть применене только к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами.
Когда не определен `FROM const_expr`, последовательность заполнения использует минимальное значение поля `expr` из `ORDER BY`.
Когда не определен `TO const_expr`, последовательность заполнения использует максимальное значение поля `expr` из `ORDER BY`.
Когда `STEP const_numeric_expr` определен, тогда `const_numeric_expr` интерпретируется `как есть` для числовых типов, как `дни` для типа Date и как `секунды` для типа DateTime.
Когда `STEP const_numeric_expr` не указан, тогда используется `1.0` для числовых типов, `1 день` для типа Date и `1 секунда` для типа DateTime.
Для примера, следующий запрос
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n
```
возвращает
```text
┌─n─┬─source───┐
│ 1 │ original │
│ 4 │ original │
│ 7 │ original │
└───┴──────────┘
```
но после применения модификатора `WITH FILL`
```sql
SELECT n, source FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5
```
возвращает
```text
┌───n─┬─source───┐
│ 0 │ │
│ 0.5 │ │
│ 1 │ original │
│ 1.5 │ │
│ 2 │ │
│ 2.5 │ │
│ 3 │ │
│ 3.5 │ │
│ 4 │ original │
│ 4.5 │ │
│ 5 │ │
│ 5.5 │ │
│ 7 │ original │
└─────┴──────────┘
```
Для случая когда у нас есть несколько полей `ORDER BY field2 WITH FILL, field1 WITH FILL` порядок заполнения будет следовать порядку полей в секции `ORDER BY`.
Пример:
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d2 WITH FILL,
d1 WITH FILL STEP 5;
```
возвращает
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 0000-00-00 │ 1970-01-03 │ │
│ 0000-00-00 │ 1970-01-04 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 0000-00-00 │ 1970-01-06 │ │
│ 0000-00-00 │ 1970-01-07 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
Поле `d1` не заполняет и используется значение по умолчанию поскольку у нас нет повторяющихся значения для `d2` поэтому мы не можем правильно рассчитать последователность заполнения для`d1`.
едующий запрос (с измененым порядком в ORDER BY)
```sql
SELECT
toDate((number * 10) * 86400) AS d1,
toDate(number * 86400) AS d2,
'original' AS source
FROM numbers(10)
WHERE (number % 3) = 1
ORDER BY
d1 WITH FILL STEP 5,
d2 WITH FILL;
```
возвращает
```text
┌───d1───────┬───d2───────┬─source───┐
│ 1970-01-11 │ 1970-01-02 │ original │
│ 1970-01-16 │ 0000-00-00 │ │
│ 1970-01-21 │ 0000-00-00 │ │
│ 1970-01-26 │ 0000-00-00 │ │
│ 1970-01-31 │ 0000-00-00 │ │
│ 1970-02-05 │ 0000-00-00 │ │
│ 1970-02-10 │ 1970-01-05 │ original │
│ 1970-02-15 │ 0000-00-00 │ │
│ 1970-02-20 │ 0000-00-00 │ │
│ 1970-02-25 │ 0000-00-00 │ │
│ 1970-03-02 │ 0000-00-00 │ │
│ 1970-03-07 │ 0000-00-00 │ │
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```

View File

@ -24,9 +24,9 @@ SELECT [DISTINCT] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m]
[LIMIT [n, ]m] [WITH TIES]
[UNION ALL ...]
[INTO OUTFILE filename]
[FORMAT format]

View File

@ -370,6 +370,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(SettingBool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \
M(SettingBool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \

View File

@ -3,6 +3,7 @@ LIBRARY()
PEERDIR(
clickhouse/src/Common
contrib/libs/protobuf
contrib/libs/protoc
)
SRCS(

View File

@ -689,6 +689,9 @@ static UInt64 getLimitForSorting(const ASTSelectQuery & query, const Context & c
if (!query.distinct && !query.limitBy() && !query.limit_with_ties && !query.arrayJoinExpressionList() && query.limitLength())
{
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
if (limit_length > std::numeric_limits<UInt64>::max() - limit_offset)
return 0;
return limit_length + limit_offset;
}
return 0;
@ -1287,6 +1290,7 @@ void InterpreterSelectQuery::executeFetchColumns(
&& !query.limitBy()
&& query.limitLength()
&& !query_analyzer->hasAggregation()
&& limit_length <= std::numeric_limits<UInt64>::max() - limit_offset
&& limit_length + limit_offset < max_block_size)
{
max_block_size = std::max(UInt64(1), limit_length + limit_offset);
@ -1649,8 +1653,9 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, *context);
UInt64 limit_for_distinct = 0;
/// If after this stage of DISTINCT ORDER BY is not executed, then you can get no more than limit_length + limit_offset of different rows.
if (!query.orderBy() || !before_order)
/// If after this stage of DISTINCT ORDER BY is not executed,
/// then you can get no more than limit_length + limit_offset of different rows.
if ((!query.orderBy() || !before_order) && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
limit_for_distinct = limit_length + limit_offset;
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
@ -1678,6 +1683,9 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
if (do_not_skip_offset)
{
if (limit_length > std::numeric_limits<UInt64>::max() - limit_offset)
return;
limit_length += limit_offset;
limit_offset = 0;
}

View File

@ -0,0 +1,84 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
class RedundantFunctionsInOrderByMatcher
{
public:
struct Data
{
std::unordered_set<String> & keys;
const Context & context;
bool redundant = true;
bool done = false;
void preventErase()
{
redundant = false;
done = true;
}
};
static void visit(const ASTPtr & ast, Data & data)
{
if (const auto * func = ast->as<ASTFunction>())
visit(*func, data);
}
static void visit(const ASTFunction & ast_function, Data & data)
{
if (data.done)
return;
bool is_lambda = (ast_function.name == "lambda");
const auto & arguments = ast_function.arguments;
bool has_arguments = arguments && !arguments->children.empty();
if (is_lambda || !has_arguments)
{
data.preventErase();
return;
}
/// If we meet function as argument then we have already checked
/// arguments of it and if it can be erased
for (const auto & arg : arguments->children)
{
/// Allow functions: visit them later
if (arg->as<ASTFunction>())
continue;
/// Allow known identifiers: they are present in ORDER BY before current item
if (auto * identifier = arg->as<ASTIdentifier>())
if (data.keys.count(getIdentifierName(identifier)))
continue;
/// Reject erase others
data.preventErase();
return;
}
const auto function = FunctionFactory::instance().tryGet(ast_function.name, data.context);
if (!function || !function->isDeterministicInScopeOfQuery())
{
data.preventErase();
}
}
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return node->as<ASTFunction>();
}
};
using RedundantFunctionsInOrderByVisitor = ConstInDepthNodeVisitor<RedundantFunctionsInOrderByMatcher, true>;
}

View File

@ -29,6 +29,7 @@
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/AnyInputOptimize.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -493,7 +494,7 @@ void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query)
}
/// Remove duplicate items from ORDER BY.
void optimizeOrderBy(const ASTSelectQuery * select_query)
void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
{
if (!select_query->orderBy())
return;
@ -528,6 +529,47 @@ void optimizeDuplicateOrderByAndDistinct(ASTPtr & query, const Context & context
DuplicateDistinctVisitor(distinct_data).visit(query);
}
/// If ORDER BY has argument x followed by f(x) transfroms it to ORDER BY x.
/// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
/// in case if f(), g(), h(), t() are deterministic (in scope of query).
/// Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, const Context & context)
{
const auto & order_by = select_query->orderBy();
if (!order_by)
return;
std::unordered_set<String> prev_keys;
ASTs modified;
modified.reserve(order_by->children.size());
for (auto & order_by_element : order_by->children)
{
/// Order by contains ASTOrderByElement as children and meaning item only as a grand child.
ASTPtr & name_or_function = order_by_element->children[0];
if (name_or_function->as<ASTFunction>())
{
if (!prev_keys.empty())
{
RedundantFunctionsInOrderByVisitor::Data data{prev_keys, context};
RedundantFunctionsInOrderByVisitor(data).visit(name_or_function);
if (data.redundant)
continue;
}
}
/// @note Leave duplicate keys unchanged. They would be removed in optimizeDuplicatesInOrderBy()
if (auto * identifier = name_or_function->as<ASTIdentifier>())
prev_keys.emplace(getIdentifierName(identifier));
modified.push_back(order_by_element);
}
if (modified.size() < order_by->children.size())
order_by->children = std::move(modified);
}
/// Remove duplicate items from LIMIT BY.
void optimizeLimitBy(const ASTSelectQuery * select_query)
{
@ -1014,12 +1056,16 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
optimizeAggregateFunctionsOfGroupByKeys(select_query);
/// Remove duplicate items from ORDER BY.
optimizeOrderBy(select_query);
optimizeDuplicatesInOrderBy(select_query);
/// Remove duplicate ORDER BY and DISTINCT from subqueries.
if (settings.optimize_duplicate_order_by_and_distinct)
optimizeDuplicateOrderByAndDistinct(query, context);
/// Remove functions from ORDER BY if its argument is also in ORDER BY
if (settings.optimize_redundant_functions_in_order_by)
optimizeRedundantFunctionsInOrderBy(select_query, context);
/// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);

View File

@ -7,10 +7,11 @@
#include <IO/copyData.h>
#include <arrow/api.h>
#include <arrow/ipc/reader.h>
#include <arrow/status.h>
#include <arrow/result.h>
#include "ArrowBufferedStreams.h"
#include "ArrowColumnToCHColumn.h"
namespace DB
{
@ -30,13 +31,12 @@ Chunk ArrowBlockInputFormat::generate()
{
Chunk res;
const Block & header = getPort().getHeader();
std::vector<std::shared_ptr<arrow::RecordBatch>> single_batch(1);
arrow::Status read_status;
arrow::Result<std::shared_ptr<arrow::RecordBatch>> batch_result;
if (stream)
{
read_status = stream_reader->ReadNext(&single_batch[0]);
if (!single_batch[0])
batch_result = stream_reader->Next();
if (batch_result.ok() && !(*batch_result))
return res;
}
else
@ -44,22 +44,21 @@ Chunk ArrowBlockInputFormat::generate()
if (record_batch_current >= record_batch_total)
return res;
read_status = file_reader->ReadRecordBatch(record_batch_current, &single_batch[0]);
batch_result = file_reader->ReadRecordBatch(record_batch_current);
}
if (!read_status.ok())
throw Exception{"Error while reading batch of Arrow data: " + read_status.ToString(),
ErrorCodes::CANNOT_READ_ALL_DATA};
if (!batch_result.ok())
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
"Error while reading batch of Arrow data: {}", batch_result.status().ToString());
std::shared_ptr<arrow::Table> table;
arrow::Status make_status = arrow::Table::FromRecordBatches(single_batch, &table);
if (!make_status.ok())
throw Exception{"Error while reading table of Arrow data: " + read_status.ToString(),
ErrorCodes::CANNOT_READ_ALL_DATA};
auto table_result = arrow::Table::FromRecordBatches({*batch_result});
if (!table_result.ok())
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
"Error while reading batch of Arrow data: {}", table_result.status().ToString());
++record_batch_current;
ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, header, "Arrow");
ArrowColumnToCHColumn::arrowTableToCHChunk(res, *table_result, header, "Arrow");
return res;
}
@ -77,15 +76,22 @@ void ArrowBlockInputFormat::resetParser()
void ArrowBlockInputFormat::prepareReader()
{
arrow::Status status;
if (stream)
status = arrow::ipc::RecordBatchStreamReader::Open(asArrowFile(in), &stream_reader);
{
auto stream_reader_status = arrow::ipc::RecordBatchStreamReader::Open(asArrowFile(in));
if (!stream_reader_status.ok())
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
"Error while opening a table: {}", stream_reader_status.status().ToString());
stream_reader = *stream_reader_status;
}
else
status = arrow::ipc::RecordBatchFileReader::Open(asArrowFile(in), &file_reader);
if (!status.ok())
throw Exception{"Error while opening a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
{
auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(asArrowFile(in));
if (!file_reader_status.ok())
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
"Error while opening a table: {}", file_reader_status.status().ToString());
file_reader = *file_reader_status;
}
if (stream)
record_batch_total = -1;

View File

@ -5,9 +5,11 @@
#include <Formats/FormatFactory.h>
#include <arrow/ipc/writer.h>
#include <arrow/table.h>
#include <arrow/result.h>
#include "ArrowBufferedStreams.h"
#include "CHColumnToArrowColumn.h"
namespace DB
{
namespace ErrorCodes
@ -35,7 +37,8 @@ void ArrowBlockOutputFormat::consume(Chunk chunk)
auto status = writer->WriteTable(*arrow_table, format_settings.arrow.row_group_size);
if (!status.ok())
throw Exception{"Error while writing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
"Error while writing a table: {}", status.ToString());
}
void ArrowBlockOutputFormat::finalize()
@ -44,22 +47,26 @@ void ArrowBlockOutputFormat::finalize()
{
auto status = writer->Close();
if (!status.ok())
throw Exception{"Error while closing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
"Error while closing a table: {}", status.ToString());
}
}
void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr<arrow::Schema> & schema)
{
arrow::Status status;
arrow::Result<std::shared_ptr<arrow::ipc::RecordBatchWriter>> writer_status;
// TODO: should we use arrow::ipc::IpcOptions::alignment?
if (stream)
status = arrow::ipc::RecordBatchStreamWriter::Open(arrow_ostream.get(), schema, &writer);
writer_status = arrow::ipc::NewStreamWriter(arrow_ostream.get(), schema);
else
status = arrow::ipc::RecordBatchFileWriter::Open(arrow_ostream.get(), schema, &writer);
writer_status = arrow::ipc::NewFileWriter(arrow_ostream.get(), schema);
if (!status.ok())
throw Exception{"Error while opening a table writer: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
if (!writer_status.ok())
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
"Error while opening a table writer: {}", writer_status.status().ToString());
writer = *writer_status;
}
void registerOutputFormatProcessorArrow(FormatFactory & factory)

View File

@ -7,10 +7,11 @@
#include <IO/copyData.h>
#include <arrow/buffer.h>
#include <arrow/io/api.h>
#include <arrow/status.h>
#include <arrow/result.h>
#include <sys/stat.h>
namespace DB
{
@ -65,12 +66,15 @@ arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbyt
arrow::Result<std::shared_ptr<arrow::Buffer>> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes)
{
std::shared_ptr<arrow::Buffer> buf;
ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(nbytes, &buf));
size_t n = in.readBig(reinterpret_cast<char *>(buf->mutable_data()), nbytes);
auto buffer_status = arrow::AllocateBuffer(nbytes);
ARROW_RETURN_NOT_OK(buffer_status);
auto read_buffer = arrow::SliceBuffer(buf, 0, n);
return arrow::Result<std::shared_ptr<arrow::Buffer>>(read_buffer);
auto shared_buffer = std::shared_ptr<arrow::Buffer>(std::move(std::move(*buffer_status)));
size_t n = in.readBig(reinterpret_cast<char *>(shared_buffer->mutable_data()), nbytes);
auto read_buffer = arrow::SliceBuffer(shared_buffer, 0, n);
return arrow::Result<std::shared_ptr<arrow::Buffer>>(shared_buffer);
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)

View File

@ -0,0 +1,59 @@
#pragma once
#include <Core/Block.h>
#include <Formats/FormatSettings.h>
#include <IO/ReadBufferFromString.h>
#include <Processors/Formats/RowInputFormatWithDiagnosticInfo.h>
namespace DB
{
/** A stream to input data in tsv format, but without escaping individual values.
* It only supports columns without '\n' or '\t'
*/
class TabSeparatedRawRowInputFormat : public TabSeparatedRowInputFormat
{
public:
/** with_names - the first line is the header with the names of the columns
* with_types - on the next line header with type names
*/
TabSeparatedRawRowInputFormat(
const Block & header_,
ReadBuffer & in_,
const Params & params_,
bool with_names_,
bool with_types_,
const FormatSettings & format_settings_)
: TabSeparatedRowInputFormat(header_, in_, params_, with_names_, with_types_, format_settings_)
{
}
String getName() const override { return "TabSeparatedRawRowInputFormat"; }
bool readField(IColumn & column, const DataTypePtr & type, bool) override
{
String tmp;
while (!in.eof())
{
char * pos = find_first_symbols<'\n', '\t'>(in.position(), in.buffer().end());
tmp.append(in.position(), pos - in.position());
in.position() = pos;
if (pos == in.buffer().end())
in.next();
else
break;
}
ReadBufferFromString cell(tmp);
type->deserializeAsWholeText(column, cell, format_settings);
return true;
}
};
}

View File

@ -3,6 +3,7 @@
#include <IO/Operators.h>
#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
#include <Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h>
#include <Formats/verbosePrintString.h>
#include <Formats/FormatFactory.h>
#include <DataTypes/DataTypeNothing.h>
@ -360,6 +361,18 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory)
});
}
for (const auto * name : {"TabSeparatedRaw", "TSVRaw"})
{
factory.registerInputFormatProcessor(name, [](
ReadBuffer & buf,
const Block & sample,
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<TabSeparatedRawRowInputFormat>(sample, buf, params, false, false, settings);
});
}
for (const auto * name : {"TabSeparatedWithNames", "TSVWithNames"})
{
factory.registerInputFormatProcessor(name, [](

View File

@ -28,10 +28,14 @@ public:
void resetParser() override;
private:
protected:
bool with_names;
bool with_types;
const FormatSettings format_settings;
virtual bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
private:
DataTypes data_types;
using IndexesMap = std::unordered_map<String, size_t>;
@ -43,8 +47,6 @@ private:
std::vector<UInt8> read_columns;
std::vector<size_t> columns_to_fill_with_default_values;
bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
void addInputColumn(const String & column_name);
void setupAllColumnsByTableSchema();
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension & row_read_extension);

View File

@ -10,7 +10,7 @@ namespace ErrorCodes
}
LimitTransform::LimitTransform(
const Block & header_, size_t limit_, size_t offset_, size_t num_streams,
const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams,
bool always_read_till_end_, bool with_ties_,
SortDescription description_)
: IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_))
@ -46,7 +46,7 @@ LimitTransform::LimitTransform(
}
}
Chunk LimitTransform::makeChunkWithPreviousRow(const Chunk & chunk, size_t row) const
Chunk LimitTransform::makeChunkWithPreviousRow(const Chunk & chunk, UInt64 row) const
{
assert(row < chunk.getNumRows());
ColumnRawPtrs current_columns = extractSortColumns(chunk.getColumns());
@ -93,7 +93,6 @@ IProcessor::Status LimitTransform::prepare(
throw Exception(
"Unexpected status for LimitTransform::preparePair : " + IProcessor::statusToName(status),
ErrorCodes::LOGICAL_ERROR);
}
};
@ -107,9 +106,12 @@ IProcessor::Status LimitTransform::prepare(
if (num_finished_port_pairs == ports_data.size())
return Status::Finished;
bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset);
/// If we reached limit for some port, then close others. Otherwise some sources may infinitely read data.
/// Example: SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1
if ((rows_read >= offset + limit) && !previous_row_chunk && !always_read_till_end)
if ((!limit_is_unreachable && rows_read >= offset + limit)
&& !previous_row_chunk && !always_read_till_end)
{
for (auto & input : inputs)
input.close();
@ -158,8 +160,10 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data)
return Status::PortFull;
}
bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset);
/// Check if we are done with pushing.
bool is_limit_reached = (rows_read >= offset + limit) && !previous_row_chunk;
bool is_limit_reached = !limit_is_unreachable && rows_read >= offset + limit && !previous_row_chunk;
if (is_limit_reached)
{
if (!always_read_till_end)
@ -223,7 +227,8 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data)
return Status::NeedData;
}
if (rows_read >= offset + rows && rows_read <= offset + limit)
if (rows <= std::numeric_limits<UInt64>::max() - offset && rows_read >= offset + rows
&& !limit_is_unreachable && rows_read <= offset + limit)
{
/// Return the whole chunk.
@ -237,7 +242,7 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data)
bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset + limit;
/// No more data is needed.
if (!always_read_till_end && (rows_read >= offset + limit) && !may_need_more_data_for_ties)
if (!always_read_till_end && !limit_is_unreachable && rows_read >= offset + limit && !may_need_more_data_for_ties)
input.close();
output.push(std::move(data.current_chunk));
@ -249,13 +254,15 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data)
void LimitTransform::splitChunk(PortsData & data)
{
auto current_chunk_sort_columns = extractSortColumns(data.current_chunk.getColumns());
size_t num_rows = data.current_chunk.getNumRows();
size_t num_columns = data.current_chunk.getNumColumns();
UInt64 num_rows = data.current_chunk.getNumRows();
UInt64 num_columns = data.current_chunk.getNumColumns();
if (previous_row_chunk && rows_read >= offset + limit)
bool limit_is_unreachable = (limit > std::numeric_limits<UInt64>::max() - offset);
if (previous_row_chunk && !limit_is_unreachable && rows_read >= offset + limit)
{
/// Scan until the first row, which is not equal to previous_row_chunk (for WITH TIES)
size_t current_row_num = 0;
UInt64 current_row_num = 0;
for (; current_row_num < num_rows; ++current_row_num)
{
if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num))
@ -267,7 +274,7 @@ void LimitTransform::splitChunk(PortsData & data)
if (current_row_num < num_rows)
{
previous_row_chunk = {};
for (size_t i = 0; i < num_columns; ++i)
for (UInt64 i = 0; i < num_columns; ++i)
columns[i] = columns[i]->cut(0, current_row_num);
}
@ -276,19 +283,51 @@ void LimitTransform::splitChunk(PortsData & data)
}
/// return a piece of the block
size_t start = std::max(
static_cast<Int64>(0),
static_cast<Int64>(offset) - static_cast<Int64>(rows_read) + static_cast<Int64>(num_rows));
UInt64 start = 0;
size_t length = std::min(
static_cast<Int64>(limit), std::min(
static_cast<Int64>(rows_read) - static_cast<Int64>(offset),
static_cast<Int64>(limit) + static_cast<Int64>(offset) - static_cast<Int64>(rows_read) + static_cast<Int64>(num_rows)));
/// ------------[....(...).]
/// <----------------------> rows_read
/// <----------> num_rows
/// <---------------> offset
/// <---> start
assert(offset < rows_read);
if (offset + num_rows > rows_read)
start = offset + num_rows - rows_read;
/// ------------[....(...).]
/// <----------------------> rows_read
/// <----------> num_rows
/// <---------------> offset
/// <---> limit
/// <---> length
/// <---> start
/// Or:
/// -----------------(------[....)....]
/// <---------------------------------> rows_read
/// <---------> num_rows
/// <---------------> offset
/// <-----------> limit
/// <----> length
/// 0 = start
UInt64 length = num_rows - start;
if (!limit_is_unreachable && offset + limit < rows_read)
{
if (offset + limit < rows_read - num_rows)
length = 0;
else
length = offset + limit - (rows_read - num_rows) - start;
}
/// check if other rows in current block equals to last one in limit
if (with_ties && length)
{
size_t current_row_num = start + length;
UInt64 current_row_num = start + length;
previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, current_row_num - 1);
for (; current_row_num < num_rows; ++current_row_num)
@ -308,7 +347,7 @@ void LimitTransform::splitChunk(PortsData & data)
auto columns = data.current_chunk.detachColumns();
for (size_t i = 0; i < num_columns; ++i)
for (UInt64 i = 0; i < num_columns; ++i)
columns[i] = columns[i]->cut(start, length);
data.current_chunk.setColumns(std::move(columns), length);
@ -324,7 +363,7 @@ ColumnRawPtrs LimitTransform::extractSortColumns(const Columns & columns) const
return res;
}
bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, size_t current_chunk_row_num) const
bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const
{
assert(current_chunk_sort_columns.size() == previous_row_chunk.getNumColumns());
size_t size = current_chunk_sort_columns.size();

View File

@ -18,9 +18,9 @@ namespace DB
class LimitTransform : public IProcessor
{
private:
UInt64 limit;
UInt64 offset;
size_t limit;
size_t offset;
bool always_read_till_end;
bool with_ties;
@ -29,7 +29,7 @@ private:
Chunk previous_row_chunk; /// for WITH TIES, contains only sort columns
std::vector<size_t> sort_column_positions;
size_t rows_read = 0; /// including the last read block
UInt64 rows_read = 0; /// including the last read block
RowsBeforeLimitCounterPtr rows_before_limit_at_least;
/// State of port's pair.
@ -46,13 +46,13 @@ private:
std::vector<PortsData> ports_data;
size_t num_finished_port_pairs = 0;
Chunk makeChunkWithPreviousRow(const Chunk & current_chunk, size_t row_num) const;
Chunk makeChunkWithPreviousRow(const Chunk & current_chunk, UInt64 row_num) const;
ColumnRawPtrs extractSortColumns(const Columns & columns) const;
bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, size_t current_chunk_row_num) const;
bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const;
public:
LimitTransform(
const Block & header_, size_t limit_, size_t offset_, size_t num_streams = 1,
const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams = 1,
bool always_read_till_end_ = false, bool with_ties_ = false,
SortDescription description_ = {});

View File

@ -10,7 +10,7 @@ namespace ErrorCodes
}
OffsetTransform::OffsetTransform(
const Block & header_, size_t offset_, size_t num_streams)
const Block & header_, UInt64 offset_, size_t num_streams)
: IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_))
, offset(offset_)
{
@ -135,7 +135,7 @@ OffsetTransform::Status OffsetTransform::preparePair(PortsData & data)
rows_read += rows;
if (rows_read < offset)
if (rows_read <= offset)
{
data.current_chunk.clear();
@ -150,7 +150,7 @@ OffsetTransform::Status OffsetTransform::preparePair(PortsData & data)
return Status::NeedData;
}
if (!(rows_read >= offset + rows))
if (!(rows <= std::numeric_limits<UInt64>::max() - offset && rows_read >= offset + rows))
splitChunk(data);
output.push(std::move(data.current_chunk));
@ -161,22 +161,30 @@ OffsetTransform::Status OffsetTransform::preparePair(PortsData & data)
void OffsetTransform::splitChunk(PortsData & data) const
{
size_t num_rows = data.current_chunk.getNumRows();
size_t num_columns = data.current_chunk.getNumColumns();
UInt64 num_rows = data.current_chunk.getNumRows();
UInt64 num_columns = data.current_chunk.getNumColumns();
/// return a piece of the block
size_t start = std::max(
static_cast<Int64>(0),
static_cast<Int64>(offset) - static_cast<Int64>(rows_read) + static_cast<Int64>(num_rows));
UInt64 start = 0;
size_t length = static_cast<Int64>(rows_read) - static_cast<Int64>(offset);
/// ------------[....(.....]
/// <----------------------> rows_read
/// <----------> num_rows
/// <---------------> offset
/// <---> start
if (length == num_rows)
assert(offset < rows_read);
if (offset + num_rows > rows_read)
start = offset + num_rows - rows_read;
else
return;
UInt64 length = num_rows - start;
auto columns = data.current_chunk.detachColumns();
for (size_t i = 0; i < num_columns; ++i)
for (UInt64 i = 0; i < num_columns; ++i)
columns[i] = columns[i]->cut(start, length);
data.current_chunk.setColumns(std::move(columns), length);

View File

@ -13,10 +13,9 @@ namespace DB
class OffsetTransform : public IProcessor
{
private:
UInt64 offset;
UInt64 rows_read = 0; /// including the last read block
size_t offset;
size_t rows_read = 0; /// including the last read block
RowsBeforeLimitCounterPtr rows_before_limit_at_least;
/// State of port's pair.
@ -34,7 +33,7 @@ private:
size_t num_finished_port_pairs = 0;
public:
OffsetTransform(const Block & header_, size_t offset_, size_t num_streams = 1);
OffsetTransform(const Block & header_, UInt64 offset_, size_t num_streams = 1);
String getName() const override { return "Offset"; }

View File

@ -5,7 +5,7 @@
namespace DB
{
LimitByTransform::LimitByTransform(const Block & header, size_t group_length_, size_t group_offset_, const Names & columns)
LimitByTransform::LimitByTransform(const Block & header, UInt64 group_length_, UInt64 group_offset_, const Names & columns)
: ISimpleTransform(header, header, true)
, group_length(group_length_)
, group_offset(group_offset_)
@ -25,13 +25,13 @@ LimitByTransform::LimitByTransform(const Block & header, size_t group_length_, s
void LimitByTransform::transform(Chunk & chunk)
{
size_t num_rows = chunk.getNumRows();
UInt64 num_rows = chunk.getNumRows();
auto columns = chunk.detachColumns();
IColumn::Filter filter(num_rows);
size_t inserted_count = 0;
UInt64 inserted_count = 0;
for (size_t row = 0; row < num_rows; ++row)
for (UInt64 row = 0; row < num_rows; ++row)
{
UInt128 key(0, 0);
SipHash hash;
@ -42,9 +42,10 @@ void LimitByTransform::transform(Chunk & chunk)
hash.get128(key.low, key.high);
auto count = keys_counts[key]++;
if (count >= group_offset && count < group_length + group_offset)
if (count >= group_offset
&& (group_length > std::numeric_limits<UInt64>::max() - group_offset || count < group_length + group_offset))
{
inserted_count++;
++inserted_count;
filter[row] = 1;
}
else

View File

@ -10,7 +10,7 @@ namespace DB
class LimitByTransform : public ISimpleTransform
{
public:
LimitByTransform(const Block & header, size_t group_length_, size_t group_offset_, const Names & columns);
LimitByTransform(const Block & header, UInt64 group_length_, UInt64 group_offset_, const Names & columns);
String getName() const override { return "LimitByTransform"; }
@ -22,8 +22,8 @@ private:
MapHashed keys_counts;
std::vector<size_t> key_positions;
const size_t group_length;
const size_t group_offset;
const UInt64 group_length;
const UInt64 group_offset;
};
}

View File

@ -74,8 +74,9 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading
auto mutable_column = res_columns[i]->assumeMutable();
auto & res_offstes = assert_cast<ColumnArray &>(*mutable_column).getOffsets();
size_t start_offset = total_rows_read ? source_offsets[total_rows_read - 1] : 0;
for (size_t row = 0; row < rows_to_read; ++row)
res_offstes.push_back(source_offsets[total_rows_read + row]);
res_offstes.push_back(source_offsets[total_rows_read + row] - start_offset);
res_columns[i] = std::move(mutable_column);
}

View File

@ -15,13 +15,11 @@ RabbitMQBlockInputStream::RabbitMQBlockInputStream(
StorageRabbitMQ & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const Context & context_,
const Names & columns,
Poco::Logger * log_)
const Names & columns)
: storage(storage_)
, metadata_snapshot(metadata_snapshot_)
, context(context_)
, column_names(columns)
, log(log_)
, non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized())
, virtual_header(metadata_snapshot->getSampleBlockForColumns({"_exchange"}, storage.getVirtuals(), storage.getStorageID()))
{

View File

@ -8,6 +8,7 @@
namespace DB
{
class RabbitMQBlockInputStream : public IBlockInputStream
{
@ -16,8 +17,7 @@ public:
StorageRabbitMQ & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const Context & context_,
const Names & columns,
Poco::Logger * log_);
const Names & columns);
~RabbitMQBlockInputStream() override;
@ -32,9 +32,10 @@ private:
StorageMetadataPtr metadata_snapshot;
Context context;
Names column_names;
Poco::Logger * log;
bool finished = false, claimed = false;
const Block non_virtual_header, virtual_header;
bool finished = false;
bool claimed = false;
const Block non_virtual_header;
const Block virtual_header;
ConsumerBufferPtr buffer;
};

View File

@ -51,14 +51,14 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer(
, exchange_name(exchange_name_)
, routing_keys(routing_keys_)
, channel_id(channel_id_)
, log(log_)
, row_delimiter(row_delimiter_)
, bind_by_id(bind_by_id_)
, num_queues(num_queues_)
, exchange_type(exchange_type_)
, local_exchange(local_exchange_)
, local_default_exchange(local_exchange + "_" + ExchangeType::DIRECT)
, local_hash_exchange(local_exchange + "_" + ExchangeType::HASH)
, log(log_)
, row_delimiter(row_delimiter_)
, stopped(stopped_)
, messages(QUEUE_SIZE * num_queues)
{
@ -146,16 +146,18 @@ void ReadBufferFromRabbitMQConsumer::initExchange()
* in current case we use hash exchange for binding to another exchange of some other type, which needs its own routing keys
* of other types: headers, patterns and string-keys. This means that hash property must be changed.
*/
AMQP::Table binding_arguments;
binding_arguments["hash-property"] = "message_id";
/// Declare exchange for sharding.
consumer_channel->declareExchange(local_hash_exchange, AMQP::consistent_hash, binding_arguments)
.onError([&](const char * message)
{
local_exchange_declared = false;
LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message);
});
AMQP::Table binding_arguments;
binding_arguments["hash-property"] = "message_id";
/// Declare exchange for sharding.
consumer_channel->declareExchange(local_hash_exchange, AMQP::consistent_hash, binding_arguments)
.onError([&](const char * message)
{
local_exchange_declared = false;
LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message);
});
}
/// Then bind client's exchange to sharding exchange (by keys, specified by the client):
@ -325,7 +327,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id)
* It is important at this moment to make sure that queue bindings are created before any publishing can happen because
* otherwise messages will be routed nowhere.
*/
while (!default_bindings_created && !default_bindings_error || (exchange_type_set && !bindings_created && !bindings_error))
while ((!default_bindings_created && !default_bindings_error) || (exchange_type_set && !bindings_created && !bindings_error))
{
iterateEventLoop();
}

View File

@ -60,7 +60,6 @@ private:
Poco::Logger * log;
char row_delimiter;
bool stalled = false;
bool allowed = true;
const std::atomic<bool> & stopped;

View File

@ -75,11 +75,11 @@ StorageRabbitMQ::StorageRabbitMQ(
, exchange_type(exchange_type_)
, use_transactional_channel(use_transactional_channel_)
, log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")"))
, semaphore(0, num_consumers_)
, parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672))
, login_password(std::make_pair(
global_context.getConfigRef().getString("rabbitmq.username"),
global_context.getConfigRef().getString("rabbitmq.password")))
, parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672))
, semaphore(0, num_consumers_)
{
loop = std::make_unique<uv_loop_t>();
uv_loop_init(loop.get());
@ -158,7 +158,7 @@ Pipes StorageRabbitMQ::read(
for (size_t i = 0; i < num_created_consumers; ++i)
{
auto rabbit_stream = std::make_shared<RabbitMQBlockInputStream>(
*this, metadata_snapshot, context, column_names, log);
*this, metadata_snapshot, context, column_names);
auto converting_stream = std::make_shared<ConvertingBlockInputStream>(
rabbit_stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Name);
pipes.emplace_back(std::make_shared<SourceFromInputStream>(converting_stream));
@ -364,7 +364,7 @@ bool StorageRabbitMQ::streamToViews()
auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID());
for (size_t i = 0; i < num_created_consumers; ++i)
{
auto rabbit_stream = std::make_shared<RabbitMQBlockInputStream>(*this, metadata_snapshot, rabbitmq_context, column_names, log);
auto rabbit_stream = std::make_shared<RabbitMQBlockInputStream>(*this, metadata_snapshot, rabbitmq_context, column_names);
auto converting_stream = std::make_shared<ConvertingBlockInputStream>(rabbit_stream, sample_block, ConvertingBlockInputStream::MatchColumnsMode::Name);
streams.emplace_back(converting_stream);

View File

@ -40,14 +40,14 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer(
, login_password(login_password_)
, routing_key(routing_key_)
, exchange_name(exchange_ + "_direct")
, log(log_)
, num_queues(num_queues_)
, bind_by_id(bind_by_id_)
, num_queues(num_queues_)
, use_transactional_channel(use_transactional_channel_)
, payloads(QUEUE_SIZE * num_queues)
, log(log_)
, delim(delimiter)
, max_rows(rows_per_message)
, chunk_size(chunk_size_)
, payloads(QUEUE_SIZE * num_queues)
{
loop = std::make_unique<uv_loop_t>();
@ -187,19 +187,19 @@ void WriteBufferToRabbitMQProducer::finilizeProducer()
answer_received = true;
LOG_TRACE(log, "All messages were successfully published");
})
.onError([&](const char * message)
.onError([&](const char * message1)
{
answer_received = true;
wait_rollback = true;
LOG_TRACE(log, "Publishing not successful: {}", message);
LOG_TRACE(log, "Publishing not successful: {}", message1);
producer_channel->rollbackTransaction()
.onSuccess([&]()
{
wait_rollback = false;
})
.onError([&](const char * message)
.onError([&](const char * message2)
{
LOG_ERROR(log, "Failed to rollback transaction: {}", message);
LOG_ERROR(log, "Failed to rollback transaction: {}", message2);
wait_rollback = false;
});
});

View File

@ -68,7 +68,6 @@ private:
const std::optional<char> delim;
const size_t max_rows;
const size_t chunk_size;
size_t count_mes = 0;
size_t rows = 0;
std::list<std::string> chunks;
};

View File

@ -72,6 +72,7 @@ const char * auto_contributors[] {
"Andrey Skobtsov",
"Andrey Urusov",
"Andy Yang",
"Anton Ivashkin",
"Anton Kobzev",
"Anton Okhitin",
"Anton Okulov",
@ -238,6 +239,7 @@ const char * auto_contributors[] {
"Korviakov Andrey",
"Kozlov Ivan",
"Kruglov Pavel",
"Kseniia Sumarokova",
"Leonardo Cecchi",
"Leopold Schabel",
"Lev Borodin",
@ -287,12 +289,14 @@ const char * auto_contributors[] {
"Michael Razuvaev",
"Michael Smitasin",
"Michal Lisowski",
"MicrochipQ",
"Mihail Fandyushin",
"Mikahil Nacharov",
"Mike F",
"Mikhail",
"Mikhail Fandyushin",
"Mikhail Filimonov",
"Mikhail Gaidamaka",
"Mikhail Korotov",
"Mikhail Malafeev",
"Mikhail Nacharov",
@ -388,6 +392,7 @@ const char * auto_contributors[] {
"Sjoerd Mulder",
"Slach",
"Snow",
"Sofia Antipushina",
"Stanislav Pavlovichev",
"Stas Pavlovichev",
"Stefan Thies",
@ -447,6 +452,7 @@ const char * auto_contributors[] {
"Vsevolod Orlov",
"Vxider",
"Vyacheslav Alipov",
"Wang Fenjin",
"Weiqing Xu",
"William Shallum",
"Winter Zhang",
@ -468,6 +474,7 @@ const char * auto_contributors[] {
"abyss7",
"achimbab",
"achulkov2",
"ageraab",
"akazz",
"akonyaev",
"akuzm",
@ -476,6 +483,7 @@ const char * auto_contributors[] {
"alex.lvxin",
"alexander kozhikhov",
"alexey-milovidov",
"amudong",
"andrei-karpliuk",
"andrewsg",
"anrodigina",
@ -488,12 +496,14 @@ const char * auto_contributors[] {
"avsharapov",
"benamazing",
"bgranvea",
"bharatnc",
"blazerer",
"bluebirddm",
"bobrovskij artemij",
"bseng",
"cekc",
"champtar",
"chengy8934",
"chenxing-xc",
"chenxing.xc",
"chertus",
@ -522,10 +532,12 @@ const char * auto_contributors[] {
"f1yegor",
"favstovol",
"felixoid",
"feng lv",
"fenglv",
"fessmage",
"filimonov",
"flow",
"flynn",
"foxxmary",
"frank",
"franklee",
@ -571,6 +583,7 @@ const char * auto_contributors[] {
"madianjun",
"maiha",
"malkfilipp",
"manmitya",
"maqroll",
"maxim",
"maxim-babenko",
@ -638,10 +651,12 @@ const char * auto_contributors[] {
"vicdashkov",
"vinity",
"vitstn",
"vivarum",
"vxider",
"vzakaznikov",
"wangchao",
"xPoSx",
"yhgcn",
"yonesko",
"zamulla",
"zhang2014",

1
tests/.gitignore vendored
View File

@ -1,4 +1,5 @@
*.result
*.diff
*.error
*.dump
test_data

View File

@ -6,7 +6,7 @@
http://www.caida.org/data/active/ipv4_dnsnames_dataset.xml.
Randomly selected entries from first 50000 rows of dataset. -->
<fill_query> INSERT INTO ips_v4 VALUES ('116.253.40.133')('183.247.232.58')('116.106.34.242')('111.56.27.171')('183.245.137.140')('183.212.25.70')('162.144.2.57')('111.4.229.190')('59.52.3.168')('115.11.21.200')('121.28.97.113')('111.46.39.248')('120.192.122.34')('113.56.44.105')('116.66.238.92')('67.22.254.206')('115.0.24.191')('182.30.107.86')('223.73.153.243')('115.159.103.38')('36.186.75.121')('111.56.188.125')('115.14.93.25')('211.97.110.141')('61.58.96.173')('203.126.212.37')('192.220.125.142')('115.22.20.223')('121.25.160.80')('117.150.98.199')('183.211.172.143')('180.244.18.143')('209.131.3.252')('220.200.1.22')('171.225.130.45')('115.4.78.200')('36.183.59.29')('218.42.159.17')('115.13.39.164')('142.254.161.133')('116.2.211.43')('36.183.126.25')('66.150.171.196')('104.149.148.137')('120.239.82.212')('111.14.182.156')('115.6.63.224')('153.35.83.233')('113.142.1.1')('121.25.82.29')('62.151.203.189')('104.27.46.146')('36.189.46.88')('116.252.54.207')('64.77.240.1')('142.252.102.78')('36.82.224.170')('117.33.191.217')('144.12.164.251')('122.10.93.66')('104.25.84.59')('111.4.242.106')('222.216.51.186')('112.33.13.212')('115.9.240.116')('171.228.0.153')('45.3.47.158')('69.57.193.230')('115.6.104.199')('104.24.237.140')('199.17.84.108')('120.193.17.57')('112.40.38.145')('67.55.90.43')('180.253.57.249')('14.204.253.158')('1.83.241.116')('202.198.37.147')('115.6.31.95')('117.32.14.179')('23.238.237.26')('116.97.76.104')('1.80.2.248')('59.50.185.152')('42.117.228.166')('119.36.22.147')('210.66.18.184')('115.19.192.159')('112.15.128.113')('1.55.138.211')('210.183.19.113')('42.115.43.114')('58.16.171.31')('171.234.78.185')('113.56.43.134')('111.53.182.225')('107.160.215.141')('171.229.231.90')('58.19.84.138')('36.79.88.107')</fill_query>
<fill_query>INSERT INTO ips_v4 VALUES ('116.253.40.133')('183.247.232.58')('116.106.34.242')('111.56.27.171')('183.245.137.140')('183.212.25.70')('162.144.2.57')('111.4.229.190')('59.52.3.168')('115.11.21.200')('121.28.97.113')('111.46.39.248')('120.192.122.34')('113.56.44.105')('116.66.238.92')('67.22.254.206')('115.0.24.191')('182.30.107.86')('223.73.153.243')('115.159.103.38')('36.186.75.121')('111.56.188.125')('115.14.93.25')('211.97.110.141')('61.58.96.173')('203.126.212.37')('192.220.125.142')('115.22.20.223')('121.25.160.80')('117.150.98.199')('183.211.172.143')('180.244.18.143')('209.131.3.252')('220.200.1.22')('171.225.130.45')('115.4.78.200')('36.183.59.29')('218.42.159.17')('115.13.39.164')('142.254.161.133')('116.2.211.43')('36.183.126.25')('66.150.171.196')('104.149.148.137')('120.239.82.212')('111.14.182.156')('115.6.63.224')('153.35.83.233')('113.142.1.1')('121.25.82.29')('62.151.203.189')('104.27.46.146')('36.189.46.88')('116.252.54.207')('64.77.240.1')('142.252.102.78')('36.82.224.170')('117.33.191.217')('144.12.164.251')('122.10.93.66')('104.25.84.59')('111.4.242.106')('222.216.51.186')('112.33.13.212')('115.9.240.116')('171.228.0.153')('45.3.47.158')('69.57.193.230')('115.6.104.199')('104.24.237.140')('199.17.84.108')('120.193.17.57')('112.40.38.145')('67.55.90.43')('180.253.57.249')('14.204.253.158')('1.83.241.116')('202.198.37.147')('115.6.31.95')('117.32.14.179')('23.238.237.26')('116.97.76.104')('1.80.2.248')('59.50.185.152')('42.117.228.166')('119.36.22.147')('210.66.18.184')('115.19.192.159')('112.15.128.113')('1.55.138.211')('210.183.19.113')('42.115.43.114')('58.16.171.31')('171.234.78.185')('113.56.43.134')('111.53.182.225')('107.160.215.141')('171.229.231.90')('58.19.84.138')('36.79.88.107')</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
@ -21,6 +21,7 @@
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<fill_query>insert into ips_v4 select * from ips_v4</fill_query>
<query tag='IPv4StringToNum'>SELECT count() FROM ips_v4 WHERE NOT ignore(IPv4StringToNum(materialize(ip))) SETTINGS max_threads=1</query>

View File

@ -1,5 +1,5 @@
<test>
<settings><max_threads>1</max_threads></settings>
<!-- The CAIDA UCSD IPv4 Routed /24 DNS Names Dataset - 20181130,
http://www.caida.org/data/active/ipv4_dnsnames_dataset.xml.
@ -22,6 +22,7 @@
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<fill_query>insert into ips_v6 select * from ips_v6</fill_query>
<query tag="IPv6StringToNum">SELECT count() FROM ips_v6 WHERE NOT ignore(IPv6StringToNum(materialize(ip)))</query>
<query tag="IPv6NumToString+IPv6StringToNum">SELECT count() FROM ips_v6 WHERE NOT ignore(IPv6NumToString(IPv6StringToNum(materialize(ip))))</query>

View File

@ -3,90 +3,90 @@
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>select min(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<!-- SocialAction is always empty in hits_100m_single, don't test it -->
<query>select min(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select min(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select max(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select any(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<query>select anyHeavy(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null</query>
<substitutions>
<substitution>
<name>group_scale</name>
<value>1000000</value>
</substitution>
</substitutions>
<query>select min(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<!-- SocialAction and SocialNetwork is always empty in hits_100m_single, don't test it -->
<query>select min(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<!-- ParamOrderID is almost always empty in hits_100m_single (3k nonempty rows), don't test it -->
<query>select min(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select min(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select max(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select any(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
<query>select anyHeavy(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
</test>

File diff suppressed because one or more lines are too long

View File

@ -1,13 +1,13 @@
<test>
<query>SELECT max(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000)</query>
<query>SELECT max(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(50000000)</query>
<query>SELECT min(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000)</query>
<query>SELECT min(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(50000000)</query>
<query>SELECT sum(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000)</query>
<query>SELECT sum(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(50000000)</query>
<query>SELECT min(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(500000)</query>
<query>SELECT min(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(50000000)</query>
<query>SELECT max(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(500000)</query>
<query>SELECT max(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(50000000)</query>
<query>SELECT max(((((number) * 10) * -2) * 3) * 2) + min(((((number) * 10) * -2) * 3) * 2) FROM numbers(500000)</query>
<query>SELECT max(((((number) * 10) * -2) * 3) * 2) + min(((((number) * 10) * -2) * 3) * 2) FROM numbers(50000000)</query>
</test>

View File

@ -1,6 +1,4 @@
<test>
<query>SELECT boundingRatio(number, number) FROM numbers(1000000)</query>
<query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(1000000)</query>
<query>SELECT boundingRatio(number, number) FROM numbers(100000000)</query>
<query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(100000000)</query>
</test>

View File

@ -1,10 +1,10 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
</preconditions>
<settings>
<max_threads>1</max_threads>
</settings>
<query>SELECT count() FROM test.hits WHERE NOT ignore(IPv4CIDRToRange(ClientIP, rand() % 33))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(IPv6CIDRToRange(ClientIP6, rand() % 33))</query>

View File

@ -1,4 +1,5 @@
<test>
<!-- FIXME this instability is abysmal, investigate the unstable queries -->
<test max_ignored_relative_change="1.5">
<settings>
<allow_suspicious_codecs>1</allow_suspicious_codecs>
</settings>
@ -31,18 +32,24 @@
<substitution>
<name>num_rows</name>
<values>
<value>1000000</value>
<value>20000000</value>
</values>
</substitution>
</substitutions>
<create_query>CREATE TABLE IF NOT EXISTS codec_{seq_type}_{type}_{codec} (n {type} CODEC({codec})) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();</create_query>
<create_query>
CREATE TABLE codec_{seq_type}_{type}_{codec} (n {type} CODEC({codec}))
ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple()
SETTINGS parts_to_delay_insert = 5000, parts_to_throw_insert = 5000;
</create_query>
<create_query>system stop merges</create_query>
<!-- Using limit to make query finite, allowing it to be run multiple times in a loop, reducing mean error -->
<query>INSERT INTO codec_seq_{type}_{codec} (n) SELECT number/pi() FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</query>
<query>INSERT INTO codec_mon_{type}_{codec} (n) SELECT number+sin(number) FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</query>
<query>INSERT INTO codec_rnd_{type}_{codec} (n) SELECT (intHash64(number) - 4294967295)/pi() FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</query>
<drop_query>system start merges</drop_query>
<drop_query>DROP TABLE IF EXISTS codec_{seq_type}_{type}_{codec}</drop_query>
</test>

View File

@ -31,7 +31,7 @@
<substitution>
<name>num_rows</name>
<values>
<value>1000000</value>
<value>20000000</value>
</values>
</substitution>
</substitutions>
@ -41,6 +41,7 @@
<fill_query>INSERT INTO codec_seq_{type}_{codec} (n) SELECT number/pi() FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</fill_query>
<fill_query>INSERT INTO codec_mon_{type}_{codec} (n) SELECT number+sin(number) FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</fill_query>
<fill_query>INSERT INTO codec_rnd_{type}_{codec} (n) SELECT (intHash64(number) - 4294967295)/pi() FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</fill_query>
<fill_query>optimize table codec_{seq_type}_{type}_{codec} settings optimize_throw_if_noop = 1</fill_query>
<query>SELECT count(n) FROM codec_{seq_type}_{type}_{codec} WHERE ignore(n) == 0 LIMIT {num_rows} SETTINGS max_threads=1</query>

View File

@ -33,7 +33,7 @@
<substitution>
<name>num_rows</name>
<values>
<value>1000000</value>
<value>20000000</value>
</values>
</substitution>
</substitutions>
@ -43,6 +43,7 @@
<fill_query>INSERT INTO codec_seq_{type}_{codec} (n) SELECT number FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</fill_query>
<fill_query>INSERT INTO codec_mon_{type}_{codec} (n) SELECT number*512+(intHash64(number)%512) FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</fill_query>
<fill_query>INSERT INTO codec_rnd_{type}_{codec} (n) SELECT intHash64(number) FROM system.numbers LIMIT {num_rows} SETTINGS max_threads=1</fill_query>
<fill_query>optimize table codec_{seq_type}_{type}_{codec} settings optimize_throw_if_noop = 1</fill_query>
<query>SELECT count(n) FROM codec_{seq_type}_{type}_{codec} WHERE ignore(n) == 0 LIMIT {num_rows} SETTINGS max_threads=1</query>

View File

@ -8,24 +8,23 @@
</preconditions>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE URL < URL]]></query>
<query short="1"><![CDATA[SELECT count() FROM hits_100m_single WHERE URL < URL]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE URL < PageCharset]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE SearchPhrase < SearchPhrase SETTINGS max_threads = 2]]></query>
<query short="1"><![CDATA[SELECT count() FROM hits_100m_single WHERE SearchPhrase < SearchPhrase SETTINGS max_threads = 2]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE SearchPhrase < URL]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE SearchPhrase < PageCharset SETTINGS max_threads = 2]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(SearchPhrase) AND SearchPhrase < SearchPhrase SETTINGS max_threads = 2]]></query>
<query short="1"><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(SearchPhrase) AND SearchPhrase < SearchPhrase SETTINGS max_threads = 2]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(SearchPhrase) AND SearchPhrase < URL]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(SearchPhrase) AND SearchPhrase < PageCharset SETTINGS max_threads = 2]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE MobilePhoneModel < MobilePhoneModel SETTINGS max_threads = 1]]></query>
<query short="1"><![CDATA[SELECT count() FROM hits_100m_single WHERE MobilePhoneModel < MobilePhoneModel SETTINGS max_threads = 1]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE MobilePhoneModel < URL]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE MobilePhoneModel < PageCharset SETTINGS max_threads = 2]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(MobilePhoneModel) AND MobilePhoneModel < MobilePhoneModel SETTINGS max_threads = 1]]></query>
<query short="1"><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(MobilePhoneModel) AND MobilePhoneModel < MobilePhoneModel SETTINGS max_threads = 1]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(MobilePhoneModel) AND MobilePhoneModel < URL]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE notEmpty(MobilePhoneModel) AND MobilePhoneModel < PageCharset SETTINGS max_threads = 2]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE PageCharset < PageCharset SETTINGS max_threads = 2]]></query>
<query short="1"><![CDATA[SELECT count() FROM hits_100m_single WHERE PageCharset < PageCharset SETTINGS max_threads = 2]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE PageCharset < URL]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE Title < Title]]></query>
<query short="1"><![CDATA[SELECT count() FROM hits_100m_single WHERE Title < Title]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE Title < URL]]></query>
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE Title < PageCharset]]></query>

View File

@ -1,7 +1,4 @@
<test>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore([[zero], [zero]])</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore([[], [zero]])</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore([[zero], [zero]])</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore([[], [zero]])</query>
</test>

View File

@ -1,35 +1,35 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(MobilePhoneModel, SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, 'Hello'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat('World', SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(MobilePhoneModel, 'Hello'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(PageCharset, 'a'))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(URL, URL))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(URL, SearchPhrase))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(MobilePhoneModel, SearchPhrase))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(URL, 'Hello'))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat('World', SearchPhrase))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(MobilePhoneModel, 'Hello'))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(PageCharset, 'a'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}', URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}', URL, SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}', MobilePhoneModel, SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello', URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('World{{}}', SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello', MobilePhoneModel))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}a', PageCharset))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}{{}}', URL, URL))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}{{}}', URL, SearchPhrase))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}{{}}', MobilePhoneModel, SearchPhrase))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}Hello', URL))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('World{{}}', SearchPhrase))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}Hello', MobilePhoneModel))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}a', PageCharset))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, SearchPhrase, MobilePhoneModel))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, 'Hello', URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat('Hello', SearchPhrase, 'World'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(MobilePhoneModel, 'Hello', PageCharset))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat('a', PageCharset, 'b'))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(URL, URL, URL))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(URL, SearchPhrase, MobilePhoneModel))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(URL, 'Hello', URL))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat('Hello', SearchPhrase, 'World'))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat(MobilePhoneModel, 'Hello', PageCharset))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(concat('a', PageCharset, 'b'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}{{}}', URL, URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}{{}}', URL, SearchPhrase, MobilePhoneModel))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello{{}}', URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('Hello{{}}World', SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello{{}}', MobilePhoneModel, PageCharset))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('a{{}}b', PageCharset))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}{{}}{{}}', URL, URL, URL))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}{{}}{{}}', URL, SearchPhrase, MobilePhoneModel))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}Hello{{}}', URL, URL))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('Hello{{}}World', SearchPhrase))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('{{}}Hello{{}}', MobilePhoneModel, PageCharset))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(format('a{{}}b', PageCharset))</query>
</test>

View File

@ -1,13 +1,11 @@
<test>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')]))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')]))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(if(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime('2019-02-04 01:24:31'), toDate('2019-02-04')))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(if(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')]))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(multiIf(rand() % 2, [toDateTime('2019-02-04 01:24:31')], [toDate('2019-02-04')]))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(if(rand() % 2, toDateTime(rand()), toDate(rand())))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime(rand()), toDate(rand())))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(if(rand() % 2, [toDateTime(rand())], [toDate(rand())]))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(multiIf(rand() % 2, [toDateTime(rand())], [toDate(rand())]))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, toDateTime(rand()), toDate(rand())))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2, toDateTime(rand()), toDate(rand())))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(if(rand() % 2, [toDateTime(rand())], [toDate(rand())]))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(multiIf(rand() % 2, [toDateTime(rand())], [toDate(rand())]))</query>
</test>

View File

@ -1,7 +1,4 @@
<test>
<substitutions>
<substitution>
<name>hash_func</name>
@ -20,8 +17,8 @@
</substitution>
</substitutions>
<query>SELECT {hash_func}(number, {buckets}) FROM numbers(1000000) FORMAT Null</query>
<query short="{buckets} &lt; 10">SELECT {hash_func}(number, {buckets}) FROM numbers(10000000) FORMAT Null</query>
<!-- sumbur with high bucket numbers is very slow, so only test two buckers -->
<query>SELECT sumburConsistentHash(toUInt32(number), 2) FROM numbers(1000000) FORMAT Null</query>
<query short="1">SELECT sumburConsistentHash(toUInt32(number), 2) FROM numbers(10000000) FORMAT Null</query>
</test>

View File

@ -1,13 +1,9 @@
<test>
<create_query>CREATE TABLE data(k UInt64, v UInt64) ENGINE = MergeTree ORDER BY k</create_query>
<fill_query>INSERT INTO data SELECT number, 1 from numbers(10000000)</fill_query>
<query tag='count_10M'>SELECT count() FROM data</query>
<query tag='count_10M' short='1'>SELECT count() FROM data</query>
<drop_query>DROP TABLE IF EXISTS data</drop_query>
</test>

View File

@ -52,17 +52,13 @@ PageCharset тоже почти всегда непуст, но его сред
<query>SELECT count() FROM hits_10m_single WHERE NOT ignore(cutQueryString(URL)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(cutQueryString(URL))</query>
<!-- 38. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_10m_single SETTINGS max_threads = 1</query>
<query>SELECT quantilesIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single</query>
<query>SELECT quantilesIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single SETTINGS max_threads = 1</query>
<!-- 39. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesTimingIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_10m_single SETTINGS max_threads = 1</query>
<query>SELECT quantilesTimingIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single</query>
<query>SELECT quantilesTimingIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single SETTINGS max_threads = 1</query>
<!-- 40. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesExactIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_10m_single SETTINGS max_threads = 1</query>
<query>SELECT quantilesExactIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single</query>
<query>SELECT quantilesExactIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single SETTINGS max_threads = 1</query>
<!-- 41. Разные алгоритмы вычисления квантилей. -->
<query>SELECT quantilesTDigestIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_10m_single SETTINGS max_threads = 1</query>
<query>SELECT quantilesTDigestIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single</query>
<query>SELECT quantilesTDigestIf(0.5, 0.9)(SendTiming, SendTiming > 0) FROM hits_100m_single SETTINGS max_threads = 1</query>
<!-- 42. Разные алгоритмы вычисления кардинальности. -->
<query>SELECT uniq(UserID) FROM hits_10m_single SETTINGS max_threads = 1</query>
<query>SELECT uniq(UserID) FROM hits_100m_single</query>

View File

@ -1,46 +1,60 @@
<test max_ignored_relative_change="1.0">
<substitutions>
<substitution>
<name>crypto_hash_func</name>
<name>hash_slow</name>
<values>
<value>MD5</value>
<value>SHA1</value>
<value>SHA224</value>
<value>SHA256</value>
<value>halfMD5</value>
</values>
</substitution>
<substitution>
<name>hash_fast</name>
<values>
<value>sipHash64</value>
<value>sipHash128</value>
</values>
</substitution>
<substitution>
<name>string</name>
<name>string_small</name>
<values>
<value>materialize('')</value>
<value>toString(1000000000+number)</value>
</values>
</substitution>
<substitution>
<name>string_slow</name>
<name>string_large</name>
<values>
<value>materialize('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris sollicitudin nisi ac erat mollis dapibus. Maecenas leo purus, bibendum eu erat eget, iaculis molestie tortor. Phasellus maximus odio nec mauris ultrices dictum. Morbi efficitur nisl eget congue mollis. Vestibulum pharetra diam vitae urna interdum, eget ultricies justo sollicitudin. Nunc sit amet purus id leo tempus dignissim. Donec ac lacus ut orci tempus scelerisque quis ultricies nibh. Nullam lobortis, erat ac ullamcorper interdum, odio nisl elementum quam, ut malesuada massa nunc eget quam. Nam suscipit neque quis sapien ultricies imperdiet. Maecenas augue libero, finibus tristique sagittis et, semper nec arcu. Morbi non tortor ultrices, sollicitudin justo sed, accumsan ligula. Nullam at ipsum in nibh auctor ullamcorper. Nullam laoreet neque id lorem condimentum tincidunt. Nullam vel orci nibh. Ut sit amet sem faucibus, fringilla orci at, lacinia enim. Mauris imperdiet ex id scelerisque eleifend. Ut tincidunt massa nibh, viverra pharetra metus')</value>
</values>
</substitution>
<substitution>
<name>table</name>
<name>numbers_small</name>
<values>
<value>numbers(100000)</value>
<value>numbers_mt(1000000)</value>
</values>
</substitution>
<substitution>
<name>numbers_medium</name>
<values>
<value>numbers(1000000)</value>
<value>numbers_mt(10000000)</value>
</values>
</substitution>
<substitution>
<name>table_slow</name>
<name>numbers_large</name>
<values>
<value>zeros(100000)</value>
<value>zeros_mt(1000000)</value>
<value>numbers(10000000)</value>
<value>numbers_mt(100000000)</value>
</values>
</substitution>
</substitutions>
<query>SELECT ignore({crypto_hash_func}({string})) FROM {table} FORMAT Null</query>
<query>SELECT ignore({crypto_hash_func}({string_slow})) FROM {table_slow} FORMAT Null</query>
<query>SELECT ignore({hash_slow}({string_small})) FROM {numbers_medium} FORMAT Null</query>
<query>SELECT ignore({hash_slow}({string_large})) FROM {numbers_small} FORMAT Null</query>
<query>SELECT ignore({hash_fast}({string_small})) FROM {numbers_large} FORMAT Null</query>
<query>SELECT ignore({hash_fast}({string_large})) FROM {numbers_medium} FORMAT Null</query>
</test>

View File

@ -1,10 +1,7 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toDate(toString(EventDate)))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(toDate(toString(EventDate)))</query>
</test>

View File

@ -1,15 +1,16 @@
<test>
<preconditions>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<settings>
<max_threads>1</max_threads>
</settings>
<create_query>CREATE TABLE dt (x DateTime) ENGINE = MergeTree ORDER BY tuple()</create_query>
<create_query>CREATE TABLE dt64 (x DateTime64(3)) ENGINE = MergeTree ORDER BY tuple()</create_query>
<fill_query>INSERT INTO dt SELECT EventTime FROM hits_100m_single</fill_query>
<fill_query>INSERT INTO dt64 SELECT toDateTime64(toDecimal64(toUInt64(EventTime), 3) + toDecimal64((rand() % 1000) / 1000, 3), 3) FROM hits_100m_single</fill_query>
<fill_query>INSERT INTO dt SELECT EventTime FROM hits_10m_single</fill_query>
<fill_query>INSERT INTO dt64 SELECT toDateTime64(toDecimal64(toUInt64(EventTime), 3) + toDecimal64((rand() % 1000) / 1000, 3), 3) FROM hits_10m_single</fill_query>
<query>SELECT count() FROM dt where not ignore(x)</query>

View File

@ -1,11 +1,18 @@
<test>
<preconditions>
<!--FIXME disabled this test until I fix it -->
<table_exists>definitely_no_such_table</table_exists>
</preconditions>
<settings>
<max_memory_usage>20G</max_memory_usage>
</settings>
<create_query>CREATE TABLE t (x UInt64, d32 Decimal32(3), d64 Decimal64(4), d128 Decimal128(5)) ENGINE = Memory</create_query>
<fill_query>INSERT INTO t SELECT number AS x, x AS d32, x AS d64, x d128 FROM numbers(1000000)</fill_query>
<!-- use less threads to save memory -->
<fill_query>INSERT INTO t SELECT number AS x, x % 1000000 AS d32, x AS d64, x d128 FROM numbers_mt(200000000) SETTINGS max_threads = 8</fill_query>
<drop_query>DROP TABLE IF EXISTS t</drop_query>
<query>SELECT min(d32), max(d32), argMin(x, d32), argMax(x, d32) FROM t</query>
<query>SELECT min(d64), max(d64), argMin(x, d64), argMax(x, d64) FROM t</query>
<query>SELECT min(d128), max(d128), argMin(x, d128), argMax(x, d128) FROM t</query>
@ -14,21 +21,21 @@
<query>SELECT avg(d64), sum(d64), sumWithOverflow(d64) FROM t</query>
<query>SELECT avg(d128), sum(d128), sumWithOverflow(d128) FROM t</query>
<query>SELECT uniq(d32), uniqCombined(d32), uniqExact(d32), uniqHLL12(d32) FROM t</query>
<query>SELECT uniq(d64), uniqCombined(d64), uniqExact(d64), uniqHLL12(d64) FROM t</query>
<query>SELECT uniq(d128), uniqCombined(d128), uniqExact(d128), uniqHLL12(d128) FROM t</query>
<query>SELECT uniq(d32), uniqCombined(d32), uniqExact(d32), uniqHLL12(d32) FROM t LIMIT 100000</query>
<query>SELECT uniq(d64), uniqCombined(d64), uniqExact(d64), uniqHLL12(d64) FROM t LIMIT 100000</query>
<query>SELECT uniq(d128), uniqCombined(d128), uniqExact(d128), uniqHLL12(d128) FROM t LIMIT 100000</query>
<query>SELECT median(d32), medianExact(d32), medianExactWeighted(d32, 2) FROM t</query>
<query>SELECT median(d64), medianExact(d64), medianExactWeighted(d64, 2) FROM t</query>
<query>SELECT median(d128), medianExact(d128), medianExactWeighted(d128, 2) FROM t</query>
<query>SELECT median(d32), medianExact(d32), medianExactWeighted(d32, 2) FROM t LIMIT 100000</query>
<query>SELECT median(d64), medianExact(d64), medianExactWeighted(d64, 2) FROM t LIMIT 100000</query>
<query>SELECT median(d128), medianExact(d128), medianExactWeighted(d128, 2) FROM t LIMIT 100000</query>
<query>SELECT quantile(d32), quantileExact(d32), quantileExactWeighted(d32, 2) FROM t</query>
<query>SELECT quantile(d64), quantileExact(d64), quantileExactWeighted(d64, 2) FROM t</query>
<query>SELECT quantile(d128), quantileExact(d128), quantileExactWeighted(d128, 2) FROM t</query>
<query>SELECT quantile(d32), quantileExact(d32), quantileExactWeighted(d32, 2) FROM t LIMIT 100000</query>
<query>SELECT quantile(d64), quantileExact(d64), quantileExactWeighted(d64, 2) FROM t LIMIT 100000</query>
<query>SELECT quantile(d128), quantileExact(d128), quantileExactWeighted(d128, 2) FROM t LIMIT 100000</query>
<query>SELECT quantilesExact(0.1, 0.9)(d32), quantilesExactWeighted(0.1, 0.9)(d32, 2) FROM t</query>
<query>SELECT quantilesExact(0.1, 0.9)(d64), quantilesExactWeighted(0.1, 0.9)(d64, 2) FROM t</query>
<query>SELECT quantilesExact(0.1, 0.9)(d128), quantilesExactWeighted(0.1, 0.9)(d128, 2) FROM t</query>
<query>SELECT quantilesExact(0.1, 0.9)(d32), quantilesExactWeighted(0.1, 0.9)(d32, 2) FROM t LIMIT 100000</query>
<query>SELECT quantilesExact(0.1, 0.9)(d64), quantilesExactWeighted(0.1, 0.9)(d64, 2) FROM t LIMIT 100000</query>
<query>SELECT quantilesExact(0.1, 0.9)(d128), quantilesExactWeighted(0.1, 0.9)(d128, 2) FROM t LIMIT 100000</query>
<query>SELECT varPop(d32), varSamp(d32), stddevPop(d32) FROM t</query>
<query>SELECT varPop(d64), varSamp(d64), stddevPop(d64) FROM t</query>

View File

@ -1,10 +1,11 @@
<test>
<preconditions>
<table_exists>hits_10m_single</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<settings><max_threads>1</max_threads></settings>
<!-- FIXME this should have been an EXPLAIN test, no point in measuring performance to deduce that the query was rewritten -->
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null</query>
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null</query>
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null</query>
</test>

View File

@ -1,10 +1,9 @@
<test>
<preconditions>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<settings><max_threads>1</max_threads></settings>
<!-- FIXME this should have been an EXPLAIN test -->
<query>SELECT count(JavaEnable) FROM hits_100m_single WHERE WatchID = 1 OR Title = 'next' OR URL = 'prev' OR OriginalURL = '???' OR 1</query>
</test>

View File

@ -1,14 +1,12 @@
<test>
<!-- gcc-8 generates 20% faster code than gcc-9
clang-8 generates more than two times slower code than gcc
-->
<create_query>CREATE TABLE empty_strings (s String) ENGINE = Log;</create_query>
<fill_query>INSERT INTO empty_strings SELECT '' FROM zeros_mt(100000000);</fill_query>
<fill_query>INSERT INTO empty_strings SELECT '' FROM zeros_mt(1000000000);</fill_query>
<query>SELECT count() FROM empty_strings</query>
<query>SELECT count() FROM empty_strings WHERE NOT ignore(s)</query>
<drop_query>DROP TABLE IF EXISTS empty_strings</drop_query>
</test>

View File

@ -1,27 +1,16 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_100m_single</table_exists>
<table_exists>hits_10m_single</table_exists>
</preconditions>
<substitutions>
<substitution>
<name>args</name>
<values>
<value>SearchEngineID</value>
<value>SearchPhrase</value>
<value>MobilePhoneModel</value>
<value>URL</value>
<value>URLDomain</value>
<value>URL, URLDomain</value>
<value>ClientIP</value>
<value>RegionID</value>
<value>ClientIP, RegionID</value>
</values>
</substitution>
</substitutions>
<query>SELECT entropy({args}) FROM test.hits</query>
<query>SELECT entropy(SearchEngineID) FROM hits_100m_single settings max_threads = 1</query>
<query>SELECT entropy(SearchPhrase) FROM hits_10m_single</query>
<query>SELECT entropy(MobilePhoneModel) FROM hits_100m_single</query>
<query>SELECT entropy(URL) FROM hits_10m_single</query>
<query>SELECT entropy(URLHash) FROM hits_10m_single</query>
<query>SELECT entropy(URL, URLHash) FROM hits_10m_single</query>
<query>SELECT entropy(ClientIP) FROM hits_10m_single</query>
<query>SELECT entropy(RegionID) FROM hits_100m_single settings max_threads = 1</query>
<query>SELECT entropy(ClientIP, RegionID) FROM hits_10m_single</query>
</test>

View File

@ -7,8 +7,7 @@
<table_exists>test.hits</table_exists>
</preconditions>
<settings><max_threads>1</max_threads></settings>
<query><![CDATA[SELECT count() FROM test.hits WHERE ClientIP6 < RemoteIP6]]></query>
</test>

View File

@ -22,6 +22,6 @@
</substitution>
</substitutions>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(toFloat64({expr}))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(toFloat64({expr_zero}))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(toFloat64({expr}))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(toFloat64({expr_zero}))</query>
</test>

View File

@ -1,9 +1,4 @@
<test>
<tags>
</tags>
<substitutions>
<substitution>
<name>format</name>
@ -18,5 +13,5 @@
</substitution>
</substitutions>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, formatDateTime(t, '{format}'))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, formatDateTime(t, '{format}'))</query>
</test>

View File

@ -1,8 +1,6 @@
<test>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(MACNumToString(number))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(MACStringToNum(MACNumToString(number)))</query>
<query>SELECT count() FROM zeros_mt(10000000) WHERE NOT ignore(MACNumToString(rand64()))</query>
<query>SELECT count() FROM zeros_mt(10000000) WHERE NOT ignore(MACStringToNum(MACNumToString(rand64())))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(MACNumToString(number))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(MACStringToNum(MACNumToString(number)))</query>
<query>SELECT count() FROM zeros_mt(1000000000) WHERE NOT ignore(MACNumToString(rand64()))</query>
<query>SELECT count() FROM zeros_mt(1000000000) WHERE NOT ignore(MACStringToNum(MACNumToString(rand64())))</query>
</test>

View File

@ -1,21 +1,21 @@
<test>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(String)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(String)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i UUID', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(UUID))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i FixedString(4)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i String', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(String)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(String)', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i UUID', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(UUID))', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i FixedString(4)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i String', 0, 10, 10) LIMIT 1000000000);</query>
</test>

View File

@ -1,8 +1,6 @@
<test>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(rand() % 2 ? 'hello' : 'world')</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(rand() % 2 ? 'hello' : '')</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(rand() % 2 ? toFixedString('hello', 5) : toFixedString('world', 5))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(rand() % 2 ? '' : toFixedString('world', 5))</query>
<query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(rand() % 2 ? 'hello' : 'world')</query>
<query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(rand() % 2 ? 'hello' : '')</query>
<query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(rand() % 2 ? toFixedString('hello', 5) : toFixedString('world', 5))</query>
<query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(rand() % 2 ? '' : toFixedString('world', 5))</query>
</test>

View File

@ -1,13 +1,10 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? URL : Referer)</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? URL : '')</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? SearchPhrase : MobilePhoneModel)</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(rand() % 2 ? '' : PageCharset)</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(rand() % 2 ? URL : Referer)</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(rand() % 2 ? URL : '')</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(rand() % 2 ? SearchPhrase : MobilePhoneModel)</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(rand() % 2 ? '' : PageCharset)</query>
</test>

View File

@ -1,9 +1,9 @@
<test>
<query><![CDATA[ WITH number AS x SELECT sum(x < 1 ? 1 : (x < 5 ? 2 : 3)) FROM numbers(1000000) ]]></query>
<query><![CDATA[ WITH number AS x SELECT any(x < 1 ? '1' : (x < 5 ? '2' : '3')) FROM numbers(1000000) ]]></query>
<query><![CDATA[ WITH number AS x SELECT sum(x < 1 ? 1 : (x < 5 ? 2 : (x < 10 ? 3 : (x % 2 ? 4 : 5)))) FROM numbers(1000000) ]]></query>
<query><![CDATA[ WITH number AS x SELECT any(x < 1 ? '1' : (x < 5 ? '2' : (x < 10 ? '3' : (x % 2 ? '4' : '5')))) FROM numbers(1000000) ]]></query>
<query><![CDATA[ WITH number AS x SELECT sum(x < 1 ? 1 : (x < 5 ? 2 : 3)) FROM numbers(100000000) ]]></query>
<query><![CDATA[ WITH number AS x SELECT any(x < 1 ? '1' : (x < 5 ? '2' : '3')) FROM numbers(100000000) ]]></query>
<query><![CDATA[ WITH number AS x SELECT sum(x < 1 ? 1 : (x < 5 ? 2 : (x < 10 ? 3 : (x % 2 ? 4 : 5)))) FROM numbers(100000000) ]]></query>
<query><![CDATA[ WITH number AS x SELECT any(x < 1 ? '1' : (x < 5 ? '2' : (x < 10 ? '3' : (x % 2 ? '4' : '5')))) FROM numbers(100000000) ]]></query>
<query><![CDATA[
WITH number AS x, x = 1 ? 1 : (x = 2 ? 2 : (x = 3 ? 3 : (x = 4 ? 4 : (x = 5 ? 5 : (x = 6 ? 6 : (x = 7 ? 7 : (x = 8 ? 8 : (x = 9 ? 9 : (x = 10 ? 10 : (x = 11 ? 11 : (x = 12 ? 12 : (x = 13 ? 13 : (x = 14 ? 14 : (x = 15 ? 15 : (x = 16 ? 16 : (x = 17 ? 17 : (x = 18 ? 18 : (x = 19 ? 19 : 20)))))))))))))))))) AS res SELECT sum(res) FROM numbers(1000000)
WITH number AS x, x = 1 ? 1 : (x = 2 ? 2 : (x = 3 ? 3 : (x = 4 ? 4 : (x = 5 ? 5 : (x = 6 ? 6 : (x = 7 ? 7 : (x = 8 ? 8 : (x = 9 ? 9 : (x = 10 ? 10 : (x = 11 ? 11 : (x = 12 ? 12 : (x = 13 ? 13 : (x = 14 ? 14 : (x = 15 ? 15 : (x = 16 ? 16 : (x = 17 ? 17 : (x = 18 ? 18 : (x = 19 ? 19 : 20)))))))))))))))))) AS res SELECT sum(res) FROM numbers(10000000)
]]></query>
</test>

View File

@ -1,8 +1,4 @@
<test>
<query>
WITH
bitXor(number, 0x4CF2D2BAAE6DA887) AS x0,
@ -11,7 +7,7 @@
bitXor(x2, bitShiftRight(x2, 33)) AS x3,
x3 * 0xc4ceb9fe1a85ec53 AS x4,
bitXor(x4, bitShiftRight(x4, 33)) AS x5
SELECT count() FROM numbers(10000000) WHERE NOT ignore(x5)
SELECT count() FROM numbers(100000000) WHERE NOT ignore(x5)
SETTINGS
compile_expressions = 0
</query>
@ -24,7 +20,7 @@
bitXor(x2, bitShiftRight(x2, 33)) AS x3,
x3 * 0xc4ceb9fe1a85ec53 AS x4,
bitXor(x4, bitShiftRight(x4, 33)) AS x5
SELECT count() FROM numbers(10000000) WHERE NOT ignore(x5)
SELECT count() FROM numbers(100000000) WHERE NOT ignore(x5)
SETTINGS
compile_expressions = 1,
min_count_to_compile_expression = 1
@ -32,6 +28,6 @@
<!-- The same expression written as ClickHouse builtin function. -->
<query>
SELECT count() FROM numbers(10000000) WHERE NOT ignore(intHash64(number))
SELECT count() FROM numbers(100000000) WHERE NOT ignore(intHash64(number))
</query>
</test>

View File

@ -1,9 +1,9 @@
<test>
<preconditions>
<table_exists>test.hits</table_exists>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<query>SELECT count() FROM test.hits WHERE NOT ignore(least(URL, Referer))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(greatest(URL, Referer, Title))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(greatest(ClientIP, RemoteIP))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(least(URL, Referer))</query>
<query>SELECT count() FROM hits_10m_single WHERE NOT ignore(greatest(URL, Referer, Title))</query>
<query>SELECT count() FROM hits_100m_single WHERE NOT ignore(greatest(ClientIP, RemoteIP))</query>
</test>

View File

@ -2,12 +2,13 @@
<preconditions>
<table_exists>hits_100m_single</table_exists>
</preconditions>
<settings><max_threads>1</max_threads></settings>
<query><![CDATA[SELECT max(length(MobilePhoneModel)) FROM hits_100m_single]]></query>
<query><![CDATA[SELECT max(length(Params)) FROM hits_100m_single]]></query>
<query><![CDATA[SELECT max(length(Title)) FROM hits_100m_single]]></query>
<query><![CDATA[SELECT max(length(Title)) FROM hits_10m_single]]></query>
<query><![CDATA[SELECT max(length(PageCharset)) FROM hits_100m_single]]></query>
<query><![CDATA[SELECT max(length(Referer)) FROM hits_100m_single]]></query>
<query><![CDATA[SELECT max(length(URL)) FROM hits_100m_single]]></query>
<query><![CDATA[SELECT max(length(Referer)) FROM hits_10m_single]]></query>
<query><![CDATA[SELECT max(length(URL)) FROM hits_10m_single]]></query>
<query><![CDATA[SELECT max(length(UTMSource)) FROM hits_100m_single]]></query>
</test>

View File

@ -1,6 +1,4 @@
<test>
<create_query>
CREATE TABLE huge_pk ENGINE = MergeTree ORDER BY (
c001, c002, c003, c004, c005, c006, c007, c008, c009, c010, c011, c012, c013, c014, c015, c016, c017, c018, c019, c020,
@ -185,11 +183,11 @@
</create_query>
<!-- some queries with PK conditions -->
<query><![CDATA[SELECT count() FROM huge_pk WHERE c001 > 10]]></query>
<query><![CDATA[SELECT count() FROM huge_pk WHERE c001 in (2,3) and c400 in (10,0) and c100 < 2]]></query>
<query><![CDATA[SELECT count() FROM huge_pk WHERE c700 > 10]]></query>
<query short="1"><![CDATA[SELECT count() FROM huge_pk WHERE c001 > 10]]></query>
<query short="1"><![CDATA[SELECT count() FROM huge_pk WHERE c001 in (2,3) and c400 in (10,0) and c100 < 2]]></query>
<query short="1"><![CDATA[SELECT count() FROM huge_pk WHERE c700 > 10]]></query>
<!-- column c701 is not in PK-->
<query><![CDATA[SELECT count() FROM huge_pk WHERE c701 > 10]]></query>
<query short="1"><![CDATA[SELECT count() FROM huge_pk WHERE c701 > 10]]></query>
<drop_query>DROP TABLE IF EXISTS huge_pk</drop_query>
</test>

View File

@ -1,15 +1,12 @@
<test>
<create_query>CREATE TABLE bad_partitions (x UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x</create_query>
<fill_query>INSERT INTO bad_partitions SELECT * FROM numbers(10000)</fill_query>
<settings>
<max_partitions_per_insert_block>0</max_partitions_per_insert_block>
</settings>
<query>SELECT count() FROM bad_partitions</query>
<query short="1">SELECT count() FROM bad_partitions</query>
<drop_query>DROP TABLE IF EXISTS bad_partitions</drop_query>
</test>

View File

@ -1,12 +1,9 @@
<test>
<create_query>CREATE TABLE simple_mergetree (EventDate Date, x UInt64) ENGINE = MergeTree ORDER BY x</create_query>
<fill_query>INSERT INTO simple_mergetree SELECT number, today() + intDiv(number, 10000000) FROM numbers_mt(100000000)</fill_query>
<fill_query>OPTIMIZE TABLE simple_mergetree FINAL</fill_query>
<query>SELECT count() FROM simple_mergetree</query>
<query short="1">SELECT count() FROM simple_mergetree</query>
<drop_query>DROP TABLE IF EXISTS simple_mergetree</drop_query>
</test>

View File

@ -12,7 +12,7 @@
<query>SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 10000 FORMAT Null</query>
<query>SELECT number AS n FROM numbers_mt(200000000) ORDER BY n DESC LIMIT 65535 FORMAT Null</query>
<query>SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n LIMIT 10 FORMAT Null</query>
<query>SELECT intHash64(number) AS n FROM numbers_mt(500000000) ORDER BY n LIMIT 10 FORMAT Null</query>
<query>SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n LIMIT 100 FORMAT Null</query>
<query>SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n LIMIT 1500 FORMAT Null</query>
<query>SELECT intHash64(number) AS n FROM numbers_mt(200000000) ORDER BY n LIMIT 3000 FORMAT Null</query>

File diff suppressed because one or more lines are too long

View File

@ -1,9 +1,6 @@
<test>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomFixedString(10))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomFixedString(100))</query>
<query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomFixedString(10))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomFixedString(100))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomFixedString(1000))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(randomFixedString(10000))</query>
</test>

View File

@ -1,9 +1,6 @@
<test>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomPrintableASCII(10))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomPrintableASCII(100))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(10))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(100))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomPrintableASCII(1000))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(randomPrintableASCII(10000))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(rand() % 10))</query>

View File

@ -1,9 +1,6 @@
<test>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(10))</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(100))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(10))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(100))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomString(1000))</query>
<query>SELECT count() FROM zeros(10000) WHERE NOT ignore(randomString(10000))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(rand() % 10))</query>

View File

@ -2,31 +2,27 @@
<settings>
<optimize_aggregation_in_order>1</optimize_aggregation_in_order>
<optimize_read_in_order>1</optimize_read_in_order>
<max_partitions_per_insert_block>200</max_partitions_per_insert_block>
<max_partitions_per_insert_block>2000</max_partitions_per_insert_block>
<max_threads>8</max_threads>
</settings>
<substitutions>
<substitution>
<name>table</name>
<name>parts</name>
<values>
<value>mt_20_parts</value>
<value>mt_200_parts</value>
<value>100</value>
<value>1000</value>
</values>
</substitution>
</substitutions>
<create_query>CREATE TABLE mt_20_parts(id UInt32, val1 UInt32, val2 UInt32) ENGINE = MergeTree ORDER BY val1 PARTITION BY id % 20</create_query>
<create_query>CREATE TABLE mt_200_parts(id UInt32, val1 UInt32, val2 UInt32) ENGINE = MergeTree ORDER BY val1 PARTITION BY id % 200</create_query>
<create_query>CREATE TABLE mt_{parts}_parts(id UInt32, val1 UInt32, val2 UInt32) ENGINE = MergeTree ORDER BY val1 PARTITION BY id % {parts}</create_query>
<fill_query>INSERT INTO mt_{parts}_parts SELECT number, rand() % 10000, rand() FROM numbers_mt(100000000)</fill_query>
<fill_query>OPTIMIZE TABLE mt_{parts}_parts FINAL</fill_query>
<fill_query>INSERT INTO mt_20_parts SELECT number, rand() % 10000, rand() FROM numbers_mt(100000000)</fill_query>
<fill_query>INSERT INTO mt_200_parts SELECT number, rand() % 10000, rand() FROM numbers_mt(100000000)</fill_query>
<fill_query>OPTIMIZE TABLE mt_20_parts FINAL</fill_query>
<fill_query>OPTIMIZE TABLE mt_200_parts FINAL</fill_query>
<query>SELECT val2 FROM mt_{parts}_parts ORDER BY val1 LIMIT 10000 FORMAT Null</query>
<query>SELECT val2 FROM mt_{parts}_parts ORDER BY val1 LIMIT 100000 FORMAT Null</query>
<query>SELECT sum(val2) FROM mt_{parts}_parts GROUP BY val1 FORMAT Null</query>
<query>SELECT val2 FROM {table} ORDER BY val1 LIMIT 100 FORMAT Null</query>
<query>SELECT val2 FROM {table} ORDER BY val1 LIMIT 100000 FORMAT Null</query>
<query>SELECT sum(val2) FROM {table} GROUP BY val1 FORMAT Null</query>
<drop_query>DROP TABLE IF EXISTS {table}</drop_query>
<drop_query>DROP TABLE IF EXISTS mt_{parts}_parts</drop_query>
</test>

View File

@ -0,0 +1,10 @@
<test>
<preconditions>
<table_exists>hits_10m_single</table_exists>
</preconditions>
<query>SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID, exp(CounterID), sqrt(CounterID) FORMAT Null</query>
<query>SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID, EventDate, exp(CounterID), toDateTime(EventDate) FORMAT Null</query>
<query>SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC, EventDate DESC, exp(CounterID), toDateTime(EventDate) FORMAT Null</query>
</test>

View File

@ -1,22 +1,19 @@
<test>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(round(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(roundBankers(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(floor(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(ceil(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(trunc(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(round(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(roundBankers(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(floor(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(ceil(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(trunc(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(round(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundBankers(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(floor(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(ceil(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(trunc(toInt64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(round(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundBankers(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(floor(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(ceil(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(trunc(toFloat64(number), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(round(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(roundBankers(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(floor(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(ceil(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(1000000) WHERE NOT ignore(trunc(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(round(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(roundBankers(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(floor(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(ceil(toDecimal128(number, 0), -2))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(trunc(toDecimal128(number, 0), -2))</query>
</test>

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More