mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 01:00:48 +00:00
Add test for OPTIMIZE FINAL and more diagnostics. [#CLICKHOUSE-2886]
This commit is contained in:
parent
11218b9ba5
commit
74a92cb8d9
@ -23,11 +23,24 @@ ColumnGathererStream::ColumnGathererStream(const BlockInputStreams & source_stre
|
|||||||
throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED);
|
throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED);
|
||||||
|
|
||||||
children.assign(source_streams.begin(), source_streams.end());
|
children.assign(source_streams.begin(), source_streams.end());
|
||||||
|
}
|
||||||
|
|
||||||
/// Trivial case
|
|
||||||
if (children.size() == 1)
|
|
||||||
return;
|
|
||||||
|
|
||||||
|
String ColumnGathererStream::getID() const
|
||||||
|
{
|
||||||
|
std::stringstream res;
|
||||||
|
|
||||||
|
res << getName() << "(";
|
||||||
|
for (size_t i = 0; i < children.size(); i++)
|
||||||
|
res << (i == 0 ? "" : ", " ) << children[i]->getID();
|
||||||
|
res << ")";
|
||||||
|
|
||||||
|
return res.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ColumnGathererStream::init()
|
||||||
|
{
|
||||||
sources.reserve(children.size());
|
sources.reserve(children.size());
|
||||||
for (size_t i = 0; i < children.size(); i++)
|
for (size_t i = 0; i < children.size(); i++)
|
||||||
{
|
{
|
||||||
@ -52,24 +65,16 @@ ColumnGathererStream::ColumnGathererStream(const BlockInputStreams & source_stre
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
String ColumnGathererStream::getID() const
|
|
||||||
{
|
|
||||||
std::stringstream res;
|
|
||||||
|
|
||||||
res << getName() << "(";
|
|
||||||
for (size_t i = 0; i < children.size(); i++)
|
|
||||||
res << (i == 0 ? "" : ", " ) << children[i]->getID();
|
|
||||||
res << ")";
|
|
||||||
|
|
||||||
return res.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Block ColumnGathererStream::readImpl()
|
Block ColumnGathererStream::readImpl()
|
||||||
{
|
{
|
||||||
if (children.size() == 1)
|
/// Special case: single source and there are no skipped rows
|
||||||
|
if (children.size() == 1 && row_source.size() == 0)
|
||||||
return children[0]->read();
|
return children[0]->read();
|
||||||
|
|
||||||
|
/// Initialize first source blocks
|
||||||
|
if (sources.empty())
|
||||||
|
init();
|
||||||
|
|
||||||
if (pos_global_start >= row_source.size())
|
if (pos_global_start >= row_source.size())
|
||||||
return Block();
|
return Block();
|
||||||
|
|
||||||
@ -154,13 +159,10 @@ void ColumnGathererStream::fetchNewBlock(Source & source, size_t source_num)
|
|||||||
|
|
||||||
void ColumnGathererStream::readSuffixImpl()
|
void ColumnGathererStream::readSuffixImpl()
|
||||||
{
|
{
|
||||||
if (children.size() == 1)
|
|
||||||
return;
|
|
||||||
|
|
||||||
const BlockStreamProfileInfo & profile_info = getProfileInfo();
|
const BlockStreamProfileInfo & profile_info = getProfileInfo();
|
||||||
double seconds = profile_info.total_stopwatch.elapsedSeconds();
|
double seconds = profile_info.total_stopwatch.elapsedSeconds();
|
||||||
LOG_DEBUG(log, std::fixed << std::setprecision(2)
|
LOG_DEBUG(log, std::fixed << std::setprecision(2)
|
||||||
<< "Gathered column " << column.name << " " << column.type->getName()
|
<< "Gathered column " << name
|
||||||
<< " (" << static_cast<double>(profile_info.bytes) / profile_info.rows << " bytes/elem.)"
|
<< " (" << static_cast<double>(profile_info.bytes) / profile_info.rows << " bytes/elem.)"
|
||||||
<< " in " << seconds << " sec., "
|
<< " in " << seconds << " sec., "
|
||||||
<< profile_info.rows / seconds << " rows/sec., "
|
<< profile_info.rows / seconds << " rows/sec., "
|
||||||
|
@ -97,6 +97,7 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void init();
|
||||||
void fetchNewBlock(Source & source, size_t source_num);
|
void fetchNewBlock(Source & source, size_t source_num);
|
||||||
|
|
||||||
std::vector<Source> sources;
|
std::vector<Source> sources;
|
||||||
|
@ -21,6 +21,8 @@ MergingSortedBlockInputStream::MergingSortedBlockInputStream(BlockInputStreams &
|
|||||||
source_blocks(inputs_.size()), cursors(inputs_.size()), out_row_sources(out_row_sources_)
|
source_blocks(inputs_.size()), cursors(inputs_.size()), out_row_sources(out_row_sources_)
|
||||||
{
|
{
|
||||||
children.insert(children.end(), inputs_.begin(), inputs_.end());
|
children.insert(children.end(), inputs_.begin(), inputs_.end());
|
||||||
|
if (out_row_sources)
|
||||||
|
out_row_sources->clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
String MergingSortedBlockInputStream::getID() const
|
String MergingSortedBlockInputStream::getID() const
|
||||||
|
@ -681,15 +681,23 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart
|
|||||||
|
|
||||||
ColumnGathererStream column_gathered_stream(column_part_streams, column_name, merged_rows_sources, DEFAULT_BLOCK_SIZE);
|
ColumnGathererStream column_gathered_stream(column_part_streams, column_name, merged_rows_sources, DEFAULT_BLOCK_SIZE);
|
||||||
MergedColumnOnlyOutputStream column_to(data, new_part_tmp_path, false, compression_method, offset_written);
|
MergedColumnOnlyOutputStream column_to(data, new_part_tmp_path, false, compression_method, offset_written);
|
||||||
|
size_t column_elems_written = 0;
|
||||||
|
|
||||||
column_to.writePrefix();
|
column_to.writePrefix();
|
||||||
while ((block = column_gathered_stream.read()))
|
while ((block = column_gathered_stream.read()))
|
||||||
{
|
{
|
||||||
|
column_elems_written += block.rows();
|
||||||
column_to.write(block);
|
column_to.write(block);
|
||||||
}
|
}
|
||||||
column_gathered_stream.readSuffix();
|
column_gathered_stream.readSuffix();
|
||||||
checksums_gathered_columns.add(column_to.writeSuffixAndGetChecksums());
|
checksums_gathered_columns.add(column_to.writeSuffixAndGetChecksums());
|
||||||
|
|
||||||
|
if (rows_written != column_elems_written)
|
||||||
|
{
|
||||||
|
throw Exception("Written " + toString(column_elems_written) + " elements of column " + column_name +
|
||||||
|
", but " + toString(rows_written) + " rows of PK columns", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
if (typeid_cast<const DataTypeArray *>(column_type.get()))
|
if (typeid_cast<const DataTypeArray *>(column_type.get()))
|
||||||
offset_columns_written.emplace(offset_column_name);
|
offset_columns_written.emplace(offset_column_name);
|
||||||
|
|
||||||
|
@ -33,5 +33,3 @@ target_link_libraries (merge_selector dbms)
|
|||||||
|
|
||||||
add_executable (merge_selector2 merge_selector2.cpp)
|
add_executable (merge_selector2 merge_selector2.cpp)
|
||||||
target_link_libraries (merge_selector2 dbms)
|
target_link_libraries (merge_selector2 dbms)
|
||||||
|
|
||||||
add_executable (row_source_bitwise_compatibility row_source_bitwise_test.cpp)
|
|
||||||
|
@ -1,17 +1,13 @@
|
|||||||
#include <cstdlib>
|
#include <gtest/gtest.h>
|
||||||
#include <DataStreams/ColumnGathererStream.h>
|
#include <DataStreams/ColumnGathererStream.h>
|
||||||
using DB::RowSourcePart;
|
using DB::RowSourcePart;
|
||||||
|
|
||||||
static void check(const RowSourcePart & s, size_t num, bool flag)
|
static void check(const RowSourcePart & s, size_t num, bool flag)
|
||||||
{
|
{
|
||||||
if ((s.getSourceNum() != num || s.getSkipFlag() != flag) || (!flag && s.getData() != num))
|
EXPECT_FALSE((s.getSourceNum() != num || s.getSkipFlag() != flag) || (!flag && s.getData() != num));
|
||||||
{
|
|
||||||
printf("FAIL");
|
|
||||||
std::exit(-1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int, char **)
|
TEST(ColumnGathererStream, RowSourcePartBitsTest)
|
||||||
{
|
{
|
||||||
check(RowSourcePart(0, false), 0, false);
|
check(RowSourcePart(0, false), 0, false);
|
||||||
check(RowSourcePart(0, true), 0, true);
|
check(RowSourcePart(0, true), 0, true);
|
||||||
@ -28,7 +24,4 @@ int main(int, char **)
|
|||||||
check(p, 80, false);
|
check(p, 80, false);
|
||||||
p.setSourceNum(RowSourcePart::MAX_PARTS);
|
p.setSourceNum(RowSourcePart::MAX_PARTS);
|
||||||
check(p, RowSourcePart::MAX_PARTS, false);
|
check(p, RowSourcePart::MAX_PARTS, false);
|
||||||
|
|
||||||
printf("PASSED");
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
@ -0,0 +1,3 @@
|
|||||||
|
1500000 1500000 1500000 1500000 1500000 1500000
|
||||||
|
[['def']] [['','']]
|
||||||
|
0
|
86
dbms/tests/queries/0_stateless/00442_optimize_final_vertical_merge.sh
Executable file
86
dbms/tests/queries/0_stateless/00442_optimize_final_vertical_merge.sh
Executable file
@ -0,0 +1,86 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
db="test"
|
||||||
|
table="optimize_me_finally"
|
||||||
|
name="$db.$table"
|
||||||
|
res_rows=1500000 # >= vertical_merge_algorithm_min_rows_to_activate
|
||||||
|
|
||||||
|
function get_num_parts {
|
||||||
|
clickhouse-client -q "SELECT count() FROM system.parts WHERE active AND database='$db' AND table='$table'"
|
||||||
|
}
|
||||||
|
|
||||||
|
clickhouse-client -q "DROP TABLE IF EXISTS $name"
|
||||||
|
|
||||||
|
clickhouse-client -q "CREATE TABLE $name (
|
||||||
|
date Date,
|
||||||
|
Sign Int8,
|
||||||
|
ki UInt64,
|
||||||
|
|
||||||
|
ds String,
|
||||||
|
di01 UInt64,
|
||||||
|
di02 UInt64,
|
||||||
|
di03 UInt64,
|
||||||
|
di04 UInt64,
|
||||||
|
di05 UInt64,
|
||||||
|
di06 UInt64,
|
||||||
|
di07 UInt64,
|
||||||
|
di08 UInt64,
|
||||||
|
di09 UInt64,
|
||||||
|
di10 UInt64,
|
||||||
|
n Nested(
|
||||||
|
i UInt64,
|
||||||
|
s String
|
||||||
|
)
|
||||||
|
)
|
||||||
|
ENGINE = CollapsingMergeTree(date, (date, ki), 8192, Sign)"
|
||||||
|
|
||||||
|
clickhouse-client -q "INSERT INTO $name (date, Sign, ki) SELECT
|
||||||
|
toDate(0) AS date,
|
||||||
|
toInt8(1) AS Sign,
|
||||||
|
toUInt64(0) AS ki
|
||||||
|
FROM system.numbers LIMIT 9000"
|
||||||
|
|
||||||
|
clickhouse-client -q "INSERT INTO $name (date, Sign, ki) SELECT
|
||||||
|
toDate(0) AS date,
|
||||||
|
toInt8(1) AS Sign,
|
||||||
|
number AS ki
|
||||||
|
FROM system.numbers LIMIT 9000, 9000"
|
||||||
|
|
||||||
|
clickhouse-client -q "INSERT INTO $name SELECT
|
||||||
|
toDate(0) AS date,
|
||||||
|
toInt8(1) AS Sign,
|
||||||
|
number AS ki,
|
||||||
|
hex(number) AS ds,
|
||||||
|
number AS di01,
|
||||||
|
number AS di02,
|
||||||
|
number AS di03,
|
||||||
|
number AS di04,
|
||||||
|
number AS di05,
|
||||||
|
number AS di06,
|
||||||
|
number AS di07,
|
||||||
|
number AS di08,
|
||||||
|
number AS di09,
|
||||||
|
number AS di10,
|
||||||
|
[number, number+1] AS \`n.i\`,
|
||||||
|
[hex(number), hex(number+1)] AS \`n.s\`
|
||||||
|
FROM system.numbers LIMIT $res_rows"
|
||||||
|
|
||||||
|
while [[ `get_num_parts` -ne 1 ]] ; do clickhouse-client -q "OPTIMIZE TABLE $name PARTITION 197001"; done
|
||||||
|
|
||||||
|
clickhouse-client -q "ALTER TABLE $name ADD COLUMN n.a Array(String)"
|
||||||
|
clickhouse-client -q "ALTER TABLE $name ADD COLUMN da Array(String) DEFAULT ['def']"
|
||||||
|
|
||||||
|
clickhouse-client -q "OPTIMIZE TABLE $name PARTITION 197001 FINAL"
|
||||||
|
|
||||||
|
clickhouse-client -q "ALTER TABLE $name MODIFY COLUMN n.a Array(String) DEFAULT ['zzz']"
|
||||||
|
clickhouse-client -q "ALTER TABLE $name MODIFY COLUMN da Array(String) DEFAULT ['zzz']"
|
||||||
|
|
||||||
|
clickhouse-client -q "SELECT count(), sum(Sign), sum(ki = di05), sum(hex(ki) = ds), sum(ki = n.i[1]), sum([hex(ki), hex(ki+1)] = n.s) FROM $name"
|
||||||
|
clickhouse-client -q "SELECT groupUniqArray(da), groupUniqArray(n.a) FROM $name"
|
||||||
|
|
||||||
|
hash_src=`clickhouse-client --max_threads=1 -q "SELECT cityHash64(groupArray(ki)) FROM $name"`
|
||||||
|
hash_ref=`clickhouse-client --max_threads=1 -q "SELECT cityHash64(groupArray(ki)) FROM (SELECT number as ki FROM system.numbers LIMIT $res_rows)"`
|
||||||
|
echo $(( $hash_src - $hash_ref ))
|
||||||
|
|
||||||
|
clickhouse-client -q "DROP TABLE IF EXISTS $name"
|
Loading…
Reference in New Issue
Block a user