Merge branch 'master' into clang-tidy-8

This commit is contained in:
Alexey Milovidov 2020-04-22 03:25:53 +03:00
commit 8fc59ef405
13 changed files with 640 additions and 28 deletions

View File

@ -176,6 +176,28 @@ function run_tests
wait wait
} }
function get_profiles_watchdog
{
sleep 3000
echo "The trace collection did not finish in time." >> report-errors.rep
for pid in $(pgrep -f clickhouse)
do
gdb -p $pid --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
done
wait
for i in {1..10}
do
if ! pkill -f clickhouse
then
break
fi
sleep 1
done
}
function get_profiles function get_profiles
{ {
# Collect the profiles # Collect the profiles
@ -446,10 +468,28 @@ case "$stage" in
time run_tests ||: time run_tests ||:
;& ;&
"get_profiles") "get_profiles")
# Getting profiles inexplicably hangs sometimes, so try to save some logs if
# this happens again. Give the servers 5 minutes to collect all info, then
# trace and kill. Start in a subshell, so that both function don't interfere
# with each other's jobs through `wait`. Also make the subshell have its own
# process group, so that we can then kill it with all its child processes.
# Somehow it doesn't kill the children by itself when dying.
set -m
( get_profiles_watchdog ) &
watchdog_pid=$!
set +m
# Check that the watchdog started OK.
kill -0 $watchdog_pid
# If the tests fail with OOM or something, still try to restart the servers # If the tests fail with OOM or something, still try to restart the servers
# to collect the logs. Prefer not to restart, because addresses might change # to collect the logs. Prefer not to restart, because addresses might change
# and we won't be able to process trace_log data. # and we won't be able to process trace_log data. Start in a subshell, so that
time get_profiles || restart || get_profiles ||: # it doesn't interfere with the watchdog through `wait`.
( time get_profiles || restart || get_profiles ||: )
# Kill the whole process group, because somehow when the subshell is killed,
# the sleep inside remains alive and orphaned.
while env kill -- -$watchdog_pid ; do sleep 1; done
# Stop the servers to free memory for the subsequent query analysis. # Stop the servers to free memory for the subsequent query analysis.
while killall clickhouse; do echo . ; sleep 1 ; done while killall clickhouse; do echo . ; sleep 1 ; done
@ -465,3 +505,7 @@ case "$stage" in
time "$script_dir/report.py" > report.html time "$script_dir/report.py" > report.html
;& ;&
esac esac
# Print some final debug info to help debug Weirdness, of which there is plenty.
jobs
pstree -apgT

View File

@ -70,7 +70,9 @@ SelectStreamFactory::SelectStreamFactory(
namespace namespace
{ {
Pipe createLocalStream(const ASTPtr & query_ast, const Block & header, const Context & context, QueryProcessingStage::Enum processed_stage, bool force_tree_shaped_pipeline) Pipe createLocalStream(
const ASTPtr & query_ast, const Block & header, const Context & context, QueryProcessingStage::Enum processed_stage,
bool add_totals_port, bool add_extremes_port, bool force_tree_shaped_pipeline)
{ {
checkStackSize(); checkStackSize();
@ -83,12 +85,10 @@ Pipe createLocalStream(const ASTPtr & query_ast, const Block & header, const Con
auto stream = interpreter.execute().in; auto stream = interpreter.execute().in;
auto source = std::make_shared<SourceFromInputStream>(std::move(stream)); auto source = std::make_shared<SourceFromInputStream>(std::move(stream));
bool add_totals_and_extremes_port = processed_stage == QueryProcessingStage::Complete; if (add_totals_port)
if (add_totals_and_extremes_port)
{
source->addTotalsPort(); source->addTotalsPort();
if (add_extremes_port)
source->addExtremesPort(); source->addExtremesPort();
}
Pipe pipe(std::move(source)); Pipe pipe(std::move(source));
@ -138,7 +138,13 @@ void SelectStreamFactory::createForShard(
Pipes & res) Pipes & res)
{ {
bool force_add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; bool force_add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
bool add_totals_and_extremes_port = processed_stage == QueryProcessingStage::Complete; bool add_totals_port = false;
bool add_extremes_port = false;
if (processed_stage == QueryProcessingStage::Complete)
{
add_totals_port = query_ast->as<ASTSelectQuery &>().group_by_with_totals;
add_extremes_port = context.getSettingsRef().extremes;
}
auto modified_query_ast = query_ast->clone(); auto modified_query_ast = query_ast->clone();
if (has_virtual_shard_num_column) if (has_virtual_shard_num_column)
@ -146,7 +152,8 @@ void SelectStreamFactory::createForShard(
auto emplace_local_stream = [&]() auto emplace_local_stream = [&]()
{ {
res.emplace_back(createLocalStream(modified_query_ast, header, context, processed_stage, query_info.force_tree_shaped_pipeline)); res.emplace_back(createLocalStream(modified_query_ast, header, context, processed_stage,
add_totals_port, add_extremes_port, query_info.force_tree_shaped_pipeline));
}; };
String modified_query = formattedAST(modified_query_ast); String modified_query = formattedAST(modified_query_ast);
@ -161,11 +168,10 @@ void SelectStreamFactory::createForShard(
auto source = std::make_shared<SourceFromInputStream>(std::move(stream), force_add_agg_info); auto source = std::make_shared<SourceFromInputStream>(std::move(stream), force_add_agg_info);
if (add_totals_and_extremes_port) if (add_totals_port)
{
source->addTotalsPort(); source->addTotalsPort();
if (add_extremes_port)
source->addExtremesPort(); source->addExtremesPort();
}
res.emplace_back(std::move(source)); res.emplace_back(std::move(source));
}; };
@ -265,7 +271,7 @@ void SelectStreamFactory::createForShard(
auto lazily_create_stream = [ auto lazily_create_stream = [
pool = shard_info.pool, shard_num = shard_info.shard_num, modified_query, header = header, modified_query_ast, context, throttler, pool = shard_info.pool, shard_num = shard_info.shard_num, modified_query, header = header, modified_query_ast, context, throttler,
main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables, main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables,
stage = processed_stage, local_delay]() stage = processed_stage, local_delay, add_totals_port, add_extremes_port]()
-> BlockInputStreamPtr -> BlockInputStreamPtr
{ {
auto current_settings = context.getSettingsRef(); auto current_settings = context.getSettingsRef();
@ -298,7 +304,8 @@ void SelectStreamFactory::createForShard(
} }
if (try_results.empty() || local_delay < max_remote_delay) if (try_results.empty() || local_delay < max_remote_delay)
return std::make_shared<TreeExecutorBlockInputStream>(createLocalStream(modified_query_ast, header, context, stage, true)); return std::make_shared<TreeExecutorBlockInputStream>(
createLocalStream(modified_query_ast, header, context, stage, add_totals_port, add_extremes_port, true));
else else
{ {
std::vector<IConnectionPool::Entry> connections; std::vector<IConnectionPool::Entry> connections;
@ -314,11 +321,10 @@ void SelectStreamFactory::createForShard(
auto lazy_stream = std::make_shared<LazyBlockInputStream>("LazyShardWithLocalReplica", header, lazily_create_stream); auto lazy_stream = std::make_shared<LazyBlockInputStream>("LazyShardWithLocalReplica", header, lazily_create_stream);
auto source = std::make_shared<SourceFromInputStream>(std::move(lazy_stream), force_add_agg_info); auto source = std::make_shared<SourceFromInputStream>(std::move(lazy_stream), force_add_agg_info);
if (add_totals_and_extremes_port) if (add_totals_port)
{
source->addTotalsPort(); source->addTotalsPort();
if (add_extremes_port)
source->addExtremesPort(); source->addExtremesPort();
}
res.emplace_back(std::move(source)); res.emplace_back(std::move(source));
} }

View File

@ -1,4 +1,5 @@
#include <any> #include <any>
#include <limits>
#include <common/logger_useful.h> #include <common/logger_useful.h>
@ -647,6 +648,11 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
if (overDictionary()) if (overDictionary())
throw Exception("Logical error: insert into hash-map in HashJoin over dictionary", ErrorCodes::LOGICAL_ERROR); throw Exception("Logical error: insert into hash-map in HashJoin over dictionary", ErrorCodes::LOGICAL_ERROR);
/// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency.
/// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code.
if (unlikely(source_block.rows() > std::numeric_limits<RowRef::SizeT>::max()))
throw Exception("Too many rows in right table block for HashJoin: " + toString(source_block.rows()), ErrorCodes::NOT_IMPLEMENTED);
/// There's no optimization for right side const columns. Remove constness if any. /// There's no optimization for right side const columns. Remove constness if any.
Block block = materializeBlock(source_block); Block block = materializeBlock(source_block);
size_t rows = block.rows(); size_t rows = block.rows();

View File

@ -1713,7 +1713,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPipeline & pipeline, const
auto transform_params = std::make_shared<AggregatingTransformParams>(params, final); auto transform_params = std::make_shared<AggregatingTransformParams>(params, final);
pipeline.dropTotalsIfHas(); /// Forget about current totals and extremes. They will be calculated again after aggregation if needed.
pipeline.dropTotalsAndExtremes();
/// If there are several sources, then we perform parallel aggregation /// If there are several sources, then we perform parallel aggregation
if (pipeline.getNumStreams() > 1) if (pipeline.getNumStreams() > 1)

View File

@ -18,8 +18,10 @@ class Block;
/// Reference to the row in block. /// Reference to the row in block.
struct RowRef struct RowRef
{ {
using SizeT = uint32_t; /// Do not use size_t cause of memory economy
const Block * block = nullptr; const Block * block = nullptr;
size_t row_num = 0; SizeT row_num = 0;
RowRef() {} RowRef() {}
RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {} RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {}
@ -33,7 +35,7 @@ struct RowRefList : RowRef
{ {
static constexpr size_t MAX_SIZE = 7; /// Adequate values are 3, 7, 15, 31. static constexpr size_t MAX_SIZE = 7; /// Adequate values are 3, 7, 15, 31.
size_t size = 0; SizeT size = 0; /// It's smaller than size_t but keeps align in Arena.
Batch * next; Batch * next;
RowRef row_refs[MAX_SIZE]; RowRef row_refs[MAX_SIZE];

View File

@ -401,18 +401,24 @@ void QueryPipeline::addTotals(ProcessorPtr source)
assertBlocksHaveEqualStructure(current_header, source->getOutputs().front().getHeader(), "QueryPipeline"); assertBlocksHaveEqualStructure(current_header, source->getOutputs().front().getHeader(), "QueryPipeline");
totals_having_port = &source->getOutputs().front(); totals_having_port = &source->getOutputs().front();
processors.emplace_back(source); processors.emplace_back(std::move(source));
} }
void QueryPipeline::dropTotalsIfHas() void QueryPipeline::dropTotalsAndExtremes()
{ {
if (totals_having_port) auto drop_port = [&](OutputPort *& port)
{ {
auto null_sink = std::make_shared<NullSink>(totals_having_port->getHeader()); auto null_sink = std::make_shared<NullSink>(port->getHeader());
connect(*totals_having_port, null_sink->getPort()); connect(*port, null_sink->getPort());
processors.emplace_back(std::move(null_sink)); processors.emplace_back(std::move(null_sink));
totals_having_port = nullptr; port = nullptr;
} };
if (totals_having_port)
drop_port(totals_having_port);
if (extremes_port)
drop_port(extremes_port);
} }
void QueryPipeline::addExtremesTransform() void QueryPipeline::addExtremesTransform()

View File

@ -109,7 +109,8 @@ public:
/// Add already calculated totals. /// Add already calculated totals.
void addTotals(ProcessorPtr source); void addTotals(ProcessorPtr source);
void dropTotalsIfHas(); /// Forget about current totals and extremes. It is needed before aggregation, cause they will be calculated again.
void dropTotalsAndExtremes();
/// Will read from this stream after all data was read from other streams. /// Will read from this stream after all data was read from other streams.
void addDelayedStream(ProcessorPtr source); void addDelayedStream(ProcessorPtr source);

View File

@ -0,0 +1,289 @@
20
20
20
distributed_group_by_no_merge = 0, extremes = 0
10
-
10
-
10
10
-
10
10
------
10
-
10
-
20
-
20
------
10
10
-
10
10
-
20
20
-
20
20
distributed_group_by_no_merge = 1, extremes = 0
10
-
10
-
10
10
-
10
10
------
10
-
10
-
20
-
20
------
10
10
-
10
10
-
20
20
-
20
20
distributed_group_by_no_merge = 0, extremes = 1
10
10
10
-
-
-
10
10
10
-
-
-
10
10
10
10
-
-
-
10
10
10
10
------
------
------
10
10
10
-
-
-
10
10
10
-
-
-
20
20
20
-
-
-
20
20
20
------
------
------
10
10
10
10
-
-
-
10
10
10
10
-
-
-
20
20
20
20
-
-
-
20
20
20
20
distributed_group_by_no_merge = 1, extremes = 1
distributed_group_by_no_merge = 1, extremes = 1
distributed_group_by_no_merge = 1, extremes = 1
10
10
10
-
-
-
10
10
10
-
-
-
10
10
10
10
-
-
-
10
10
10
10
------
------
------
10
10
10
-
-
-
10
10
10
-
-
-
20
20
20
-
-
-
20
20
20
------
------
------
10
10
10
10
-
-
-
10
10
10
10
-
-
-
20
20
20
20
-
-
-
20
20
20
20

View File

@ -0,0 +1,106 @@
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.{1,2}', system.numbers) LIMIT 5 SETTINGS distributed_group_by_no_merge = 1);
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.{1,2}', system.numbers) LIMIT 5 SETTINGS distributed_group_by_no_merge = 1) with totals;
SELECT 'distributed_group_by_no_merge = 0, extremes = 0';
SET distributed_group_by_no_merge = 0, extremes = 0;
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5) with totals;
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5) with totals;
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5)) with totals);
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5))) with totals;
SELECT 'distributed_group_by_no_merge = 1, extremes = 0';
SET distributed_group_by_no_merge = 1, extremes = 0;
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5) with totals;
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5) with totals;
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5)) with totals);
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5))) with totals;
SELECT 'distributed_group_by_no_merge = 0, extremes = 1';
SET distributed_group_by_no_merge = 0, extremes = 1;
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5) with totals;
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5) with totals;
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5)) with totals);
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5))) with totals;
SELECT 'distributed_group_by_no_merge = 1, extremes = 1';
SET distributed_group_by_no_merge = 1, extremes = 1;
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5);
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 5) with totals;
SELECT '-';
SELECT sum(number) FROM (SELECT * FROM remote('127.0.0.2', system.numbers) LIMIT 5) with totals;
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5)) with totals);
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5)) with totals);
SELECT '------';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.1', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.2', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{1,2}', numbers(5))) with totals;
SELECT '-';
SELECT sum(s) FROM (SELECT sum(number) as s FROM remote('127.0.0.{2,3}', numbers(5))) with totals;

View File

@ -0,0 +1,9 @@
CREATE DICTIONARY testip
(
`network` String,
`test_field` String
)
PRIMARY KEY network
SOURCE(FILE(PATH '/tmp/test.csv' FORMAT CSVWithNames))
LIFETIME(MIN 0 MAX 300)
LAYOUT(IPTRIE()); -- { serverError 137 }

View File

@ -0,0 +1,31 @@
0 300 0000-00-00 00:00:00 N1
0 300 0000-00-00 00:00:00 N1
0 300 0000-00-00 00:00:00 N1
0 300 0000-00-00 00:00:00 N1
0 300 0000-00-00 00:00:00 N1
0 300 0000-00-00 00:00:00 N1
0 200 0000-00-00 00:00:00 C2
0 0 0000-00-00 00:00:00 C3
0 0 0000-00-00 00:00:00 C3
0 0 0000-00-00 00:00:00 N1
0 0 0000-00-00 00:00:00 N1
0 0 0000-00-00 00:00:00 N1
0 0 0000-00-00 00:00:00 N1
0 0 0000-00-00 00:00:00 N1
0 210 2020-02-16 05:22:04 N1
0 210 2020-02-16 05:22:04 N1
0 270 2020-02-16 05:22:04 N1
0 270 2020-02-16 05:22:04 N1
0 380 0000-00-00 00:00:00 N1
0 380 0000-00-00 00:00:00 N1
0 280 0000-00-00 00:00:00 C2
0 0 0000-00-00 00:00:00 N1
0 190 2020-02-16 05:22:05 N1
0 160 2020-02-14 05:22:13 N1
0 230 2020-02-14 05:22:13 N1
0 130 2020-02-14 05:22:14 N1
0 300 0000-00-00 00:00:00 N1
0 300 0000-00-00 00:00:00 N1
0 0 0000-00-00 00:00:00 C2
0 0 0000-00-00 00:00:00 N1
0 0 0000-00-00 00:00:00 N1

File diff suppressed because one or more lines are too long