Fix SIGSEGV in SortedBlocksWriter in case of empty block

CI found one issue [1].

Here is the stack trace for invalid read:

<details>

<summary>stack trace</summary>

```
    0: DB::TemporaryFileLazySource::TemporaryFileLazySource(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Block const&) [inlined] std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__is_long(this="") const at string:1445:22
    1: DB::TemporaryFileLazySource::TemporaryFileLazySource(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Block const&) [inlined] std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::basic_string(this="", __str="") at string:1927
    2: DB::TemporaryFileLazySource::TemporaryFileLazySource(this=0x00007f3aec105f58, path_="", header_=0x00007f38ffd93b40) at TemporaryFileLazySource.cpp:11
    3: DB::SortedBlocksWriter::streamFromFile(std::__1::unique_ptr<Poco::TemporaryFile, std::__1::default_delete<Poco::TemporaryFile> > const&) const [inlined] DB::TemporaryFileLazySource* std::__1::construct_at<DB::TemporaryFileLazySource, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Block, DB::TemporaryFileLazySource*>(__args=0x00007f38ffd91560) at construct_at.h:38:50
    4: DB::SortedBlocksWriter::streamFromFile(std::__1::unique_ptr<Poco::TemporaryFile, std::__1::default_delete<Poco::TemporaryFile> > const&) const [inlined] void std::__1::allocator_traits<std::__1::allocator<DB::TemporaryFileLazySource> >::construct<DB::TemporaryFileLazySource, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Block, void, void>(__args=0x00007f38ffd91560) at allocator_traits.h:298
    5: DB::SortedBlocksWriter::streamFromFile(std::__1::unique_ptr<Poco::TemporaryFile, std::__1::default_delete<Poco::TemporaryFile> > const&) const [inlined] std::__1::__shared_ptr_emplace<DB::TemporaryFileLazySource, std::__1::allocator<DB::TemporaryFileLazySource> >::__shared_ptr_emplace<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Block>(this=0x00007f3aec105f40, __args=0x00007f38ffd91560) at shared_ptr.h:293
    6: DB::SortedBlocksWriter::streamFromFile(std::__1::unique_ptr<Poco::TemporaryFile, std::__1::default_delete<Poco::TemporaryFile> > const&) const [inlined] std::__1::shared_ptr<DB::TemporaryFileLazySource> std::__1::allocate_shared<DB::TemporaryFileLazySource, std::__1::allocator<DB::TemporaryFileLazySource>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Block, void>(__args=<unavailable>, __args=<unavailable>) at shared_ptr.h:954
    7: DB::SortedBlocksWriter::streamFromFile(std::__1::unique_ptr<Poco::TemporaryFile, std::__1::default_delete<Poco::TemporaryFile> > const&) const [inlined] std::__1::shared_ptr<DB::TemporaryFileLazySource> std::__1::make_shared<DB::TemporaryFileLazySource, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, DB::Block, void>(__args=<unavailable>, __args=<unavailable>) at shared_ptr.h:963
    8: DB::SortedBlocksWriter::streamFromFile(this=<unavailable>, file=<unavailable>) const at SortedBlocksWriter.cpp:238
    9: DB::SortedBlocksWriter::premerge(this=<unavailable>) at SortedBlocksWriter.cpp:209:32
```

</details>

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/41046/adea92f847373d1fcfd733d8979c63024f9b80bf/stress_test__asan_.html

So the problem here is that there was empty unique_ptr<> reference to
temporary file, because of empty block that accepted by
SortedBlocksWriter::insert(), but insert() is not a problem the problem
is premerge() that steals blocks from insert() and do not have check
that there are some rows. However this check exists in
SortedBlocksWriter::flush(), and in that case temporary file is not
created.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2022-09-09 19:23:45 +02:00
parent 57146c9361
commit 763bb18f98
3 changed files with 28 additions and 1 deletions

View File

@ -28,6 +28,11 @@ namespace CurrentMetrics
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
@ -84,6 +89,9 @@ void SortedBlocksWriter::insert(Block && block)
size_t bytes = 0;
size_t flush_no = 0;
if (!block.rows())
return;
{
std::lock_guard lock{insert_mutex};
@ -145,7 +153,7 @@ SortedBlocksWriter::TmpFilePtr SortedBlocksWriter::flush(const BlocksList & bloc
pipes.emplace_back(std::make_shared<SourceFromSingleChunk>(block.cloneEmpty(), Chunk(block.getColumns(), num_rows)));
if (pipes.empty())
return {};
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty block");
QueryPipelineBuilder pipeline;
pipeline.init(Pipe::unitePipes(std::move(pipes)));

View File

@ -0,0 +1,19 @@
-- Regression test when Join stores data on disk and receive empty block.
-- Because of this it does not create empty file, while expect it.
SET max_threads = 1;
SET join_algorithm = 'auto';
SET max_rows_in_join = 1000;
SET optimize_aggregation_in_order = 1;
SET max_block_size = 1000;
DROP TABLE IF EXISTS join_on_disk;
SYSTEM STOP MERGES join_on_disk;
CREATE TABLE join_on_disk (id Int) Engine=MergeTree() ORDER BY id;
INSERT INTO join_on_disk SELECT number as id FROM numbers_mt(50000);
INSERT INTO join_on_disk SELECT number as id FROM numbers_mt(1000);
SELECT id FROM join_on_disk lhs LEFT JOIN (SELECT id FROM join_on_disk GROUP BY id) rhs USING (id) FORMAT Null;