mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
Fix stupid error
This commit is contained in:
parent
8ccaa6ede9
commit
184dbedb06
@ -183,8 +183,8 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
|
||||
/// adjust last mark rows and flush to disk.
|
||||
if (rows_written_in_last_mark >= index_granularity_for_block)
|
||||
adjustLastMarkIfNeedAndFlushToDisk(rows_written_in_last_mark);
|
||||
else /// We still can write some rows from new block into previous granule.
|
||||
adjustLastMarkIfNeedAndFlushToDisk(index_granularity_for_block - rows_written_in_last_mark);
|
||||
else /// We still can write some rows from new block into previous granule. So the granule size will be block granularity size.
|
||||
adjustLastMarkIfNeedAndFlushToDisk(index_granularity_for_block);
|
||||
}
|
||||
}
|
||||
|
||||
@ -614,6 +614,11 @@ void MergeTreeDataPartWriterWide::fillIndexGranularity(size_t index_granularity_
|
||||
|
||||
void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_rows_in_last_mark)
|
||||
{
|
||||
/// We don't want to split already written granules to smaller
|
||||
if (rows_written_in_last_mark > new_rows_in_last_mark)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Tryin to make mark #{} smaller ({} rows) then it already has {}",
|
||||
getCurrentMark(), new_rows_in_last_mark, rows_written_in_last_mark);
|
||||
|
||||
/// We can adjust marks only if we computed granularity for blocks.
|
||||
/// Otherwise we cannot change granularity because it will differ from
|
||||
/// other columns
|
||||
|
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import string
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
def get_random_string(length):
|
||||
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))
|
||||
|
||||
client = ClickHouseClient()
|
||||
|
||||
def insert_block(table_name, block_granularity_rows, block_rows):
|
||||
global client
|
||||
block_data = []
|
||||
index_granularity_bytes = 10 * 1024 * 1024
|
||||
row_bytes = index_granularity_bytes // block_granularity_rows
|
||||
for _ in range(block_rows):
|
||||
block_data.append(get_random_string(row_bytes - 1))
|
||||
|
||||
values_row = ", ".join("(1, '" + row + "')" for row in block_data)
|
||||
client.query("INSERT INTO {} VALUES {}".format(table_name, values_row))
|
||||
|
||||
try:
|
||||
client.query("DROP TABLE IF EXISTS t")
|
||||
client.query("CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0")
|
||||
|
||||
client.query("SYSTEM STOP MERGES t")
|
||||
|
||||
# These blocks size taken from the real table which reproduces the error
|
||||
# when we get granule with more rows then fixed granularity after horizontal merge.
|
||||
# About 10k rows when max is 8912.
|
||||
#
|
||||
# Why these blocks are special?
|
||||
# 1) The first one contains 1811 rows, but its granule should have 6853.
|
||||
# So we write 1811 and get unfinished granule with 6853 - 1811 = 5042 rows to write from the next blocks.
|
||||
#
|
||||
# 2) The second block has fewer rows than rows left in the unfinished granule (3094 < 5042).
|
||||
# It can be written entirely in this unfinished granule and we will still have some rows left. But it's granularity
|
||||
# should be smaller than rows left in granule (3094 < 5042), so clickhouse will adjust (make smaller) this last unfinished granule.
|
||||
# This adjust logic contained a bug: we adjust not to the new block's granularity (3094), but to the difference of the new block granularity and
|
||||
# already written rows (3094 - 1811 = 1283). This lead to several unsigned integer overflows in code and huge granules as result.
|
||||
#
|
||||
# 3) Last block just triggers the check that each granule has fewer rows than fixed granularity rows. If the bug from 2) exists then it will fail.
|
||||
insert_block("t", block_granularity_rows=6853, block_rows=1811)
|
||||
insert_block("t", block_granularity_rows=3094, block_rows=3094)
|
||||
insert_block("t", block_granularity_rows=6092, block_rows=6092)
|
||||
|
||||
client.query("SYSTEM START MERGES t")
|
||||
client.query("OPTIMIZE TABLE t FINAL")
|
||||
|
||||
print(client.query_return_df("SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames")['C'][0])
|
||||
finally:
|
||||
client.query("DROP TABLE IF EXISTS t")
|
@ -0,0 +1 @@
|
||||
10997
|
7
tests/queries/0_stateless/01654_test_writer_block_sequence.sh
Executable file
7
tests/queries/0_stateless/01654_test_writer_block_sequence.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
python3 "$CURDIR"/01654_test_writer_block_sequence.python
|
Loading…
Reference in New Issue
Block a user