mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #12173 from nikitamikhaylov/summing-bug-fix
Exclude partition key columns from SummingMergeTree
This commit is contained in:
commit
ffdd6e1af6
@ -169,17 +169,6 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
String createCommaSeparatedStringFrom(const Names & names)
|
|
||||||
{
|
|
||||||
std::ostringstream ss;
|
|
||||||
if (!names.empty())
|
|
||||||
{
|
|
||||||
std::copy(names.begin(), std::prev(names.end()), std::ostream_iterator<std::string>(ss, ", "));
|
|
||||||
ss << names.back();
|
|
||||||
}
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
|
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
|
||||||
{
|
{
|
||||||
const auto sorting_key_ast = extractOrderBy(storage_ast);
|
const auto sorting_key_ast = extractOrderBy(storage_ast);
|
||||||
|
@ -40,6 +40,7 @@
|
|||||||
#include <Columns/ColumnString.h>
|
#include <Columns/ColumnString.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <DataTypes/NestedUtils.h>
|
||||||
#include <Parsers/ParserCreateQuery.h>
|
#include <Parsers/ParserCreateQuery.h>
|
||||||
#include <Parsers/parseQuery.h>
|
#include <Parsers/parseQuery.h>
|
||||||
#include <Parsers/ParserQuery.h>
|
#include <Parsers/ParserQuery.h>
|
||||||
@ -197,8 +198,6 @@ ASTPtr extractPrimaryKey(const ASTPtr & storage_ast);
|
|||||||
|
|
||||||
ASTPtr extractOrderBy(const ASTPtr & storage_ast);
|
ASTPtr extractOrderBy(const ASTPtr & storage_ast);
|
||||||
|
|
||||||
String createCommaSeparatedStringFrom(const Names & names);
|
|
||||||
|
|
||||||
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast);
|
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast);
|
||||||
|
|
||||||
String extractReplicatedTableZookeeperPath(const ASTPtr & storage_ast);
|
String extractReplicatedTableZookeeperPath(const ASTPtr & storage_ast);
|
||||||
|
@ -268,7 +268,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
|
|||||||
ParserStorage parser_storage;
|
ParserStorage parser_storage;
|
||||||
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||||
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
|
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
|
||||||
primary_key_comma_separated = createCommaSeparatedStringFrom(extractPrimaryKeyColumnNames(engine_push_ast));
|
primary_key_comma_separated = Nested::createCommaSeparatedStringFrom(extractPrimaryKeyColumnNames(engine_push_ast));
|
||||||
engine_push_zk_path = extractReplicatedTableZookeeperPath(engine_push_ast);
|
engine_push_zk_path = extractReplicatedTableZookeeperPath(engine_push_ast);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,6 +70,17 @@ std::pair<std::string, std::string> splitName(const std::string & name)
|
|||||||
return {{ begin, first_end }, { second_begin, end }};
|
return {{ begin, first_end }, { second_begin, end }};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string createCommaSeparatedStringFrom(const Names & names)
|
||||||
|
{
|
||||||
|
std::ostringstream ss;
|
||||||
|
if (!names.empty())
|
||||||
|
{
|
||||||
|
std::copy(names.begin(), std::prev(names.end()), std::ostream_iterator<std::string>(ss, ", "));
|
||||||
|
ss << names.back();
|
||||||
|
}
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string extractTableName(const std::string & nested_name)
|
std::string extractTableName(const std::string & nested_name)
|
||||||
{
|
{
|
||||||
|
@ -13,6 +13,8 @@ namespace Nested
|
|||||||
|
|
||||||
std::pair<std::string, std::string> splitName(const std::string & name);
|
std::pair<std::string, std::string> splitName(const std::string & name);
|
||||||
|
|
||||||
|
std::string createCommaSeparatedStringFrom(const Names & names);
|
||||||
|
|
||||||
/// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot.
|
/// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot.
|
||||||
std::string extractTableName(const std::string & nested_name);
|
std::string extractTableName(const std::string & nested_name);
|
||||||
|
|
||||||
|
@ -94,6 +94,12 @@ static bool isInPrimaryKey(const SortDescription & description, const std::strin
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isInPartitionKey(const std::string & column_name, const Names & partition_key_columns)
|
||||||
|
{
|
||||||
|
auto is_in_partition_key = std::find(partition_key_columns.begin(), partition_key_columns.end(), column_name);
|
||||||
|
return is_in_partition_key != partition_key_columns.end();
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if merge result is not empty
|
/// Returns true if merge result is not empty
|
||||||
static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc,
|
static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc,
|
||||||
Row & row, const ColumnRawPtrs & raw_columns, size_t row_number)
|
Row & row, const ColumnRawPtrs & raw_columns, size_t row_number)
|
||||||
@ -181,7 +187,8 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc,
|
|||||||
static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
||||||
const Block & header,
|
const Block & header,
|
||||||
const SortDescription & description,
|
const SortDescription & description,
|
||||||
const Names & column_names_to_sum)
|
const Names & column_names_to_sum,
|
||||||
|
const Names & partition_key_columns)
|
||||||
{
|
{
|
||||||
size_t num_columns = header.columns();
|
size_t num_columns = header.columns();
|
||||||
SummingSortedAlgorithm::ColumnsDefinition def;
|
SummingSortedAlgorithm::ColumnsDefinition def;
|
||||||
@ -223,8 +230,8 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Are they inside the PK?
|
/// Are they inside the primary key or partiton key?
|
||||||
if (isInPrimaryKey(description, column.name, i))
|
if (isInPrimaryKey(description, column.name, i) || isInPartitionKey(column.name, partition_key_columns))
|
||||||
{
|
{
|
||||||
def.column_numbers_not_to_aggregate.push_back(i);
|
def.column_numbers_not_to_aggregate.push_back(i);
|
||||||
continue;
|
continue;
|
||||||
@ -617,9 +624,10 @@ SummingSortedAlgorithm::SummingSortedAlgorithm(
|
|||||||
const Block & header, size_t num_inputs,
|
const Block & header, size_t num_inputs,
|
||||||
SortDescription description_,
|
SortDescription description_,
|
||||||
const Names & column_names_to_sum,
|
const Names & column_names_to_sum,
|
||||||
|
const Names & partition_key_columns,
|
||||||
size_t max_block_size)
|
size_t max_block_size)
|
||||||
: IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_))
|
: IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_))
|
||||||
, columns_definition(defineColumns(header, description, column_names_to_sum))
|
, columns_definition(defineColumns(header, description, column_names_to_sum, partition_key_columns))
|
||||||
, merged_data(getMergedDataColumns(header, columns_definition), max_block_size, columns_definition)
|
, merged_data(getMergedDataColumns(header, columns_definition), max_block_size, columns_definition)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,8 @@ public:
|
|||||||
SortDescription description_,
|
SortDescription description_,
|
||||||
/// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
|
/// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
|
||||||
const Names & column_names_to_sum,
|
const Names & column_names_to_sum,
|
||||||
|
/// List of partition key columns. They have to be excluded.
|
||||||
|
const Names & partition_key_columns,
|
||||||
size_t max_block_size);
|
size_t max_block_size);
|
||||||
|
|
||||||
void initialize(Inputs inputs) override;
|
void initialize(Inputs inputs) override;
|
||||||
|
@ -16,6 +16,7 @@ public:
|
|||||||
SortDescription description_,
|
SortDescription description_,
|
||||||
/// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
|
/// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
|
||||||
const Names & column_names_to_sum,
|
const Names & column_names_to_sum,
|
||||||
|
const Names & partition_key_columns,
|
||||||
size_t max_block_size)
|
size_t max_block_size)
|
||||||
: IMergingTransform(
|
: IMergingTransform(
|
||||||
num_inputs, header, header, true,
|
num_inputs, header, header, true,
|
||||||
@ -23,6 +24,7 @@ public:
|
|||||||
num_inputs,
|
num_inputs,
|
||||||
std::move(description_),
|
std::move(description_),
|
||||||
column_names_to_sum,
|
column_names_to_sum,
|
||||||
|
partition_key_columns,
|
||||||
max_block_size)
|
max_block_size)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -169,7 +169,7 @@ MergeTreeData::MergeTreeData(
|
|||||||
const auto settings = getSettings();
|
const auto settings = getSettings();
|
||||||
|
|
||||||
/// NOTE: using the same columns list as is read when performing actual merges.
|
/// NOTE: using the same columns list as is read when performing actual merges.
|
||||||
merging_params.check(metadata_.getColumns().getAllPhysical());
|
merging_params.check(metadata_);
|
||||||
|
|
||||||
if (metadata_.sampling_key.definition_ast != nullptr)
|
if (metadata_.sampling_key.definition_ast != nullptr)
|
||||||
{
|
{
|
||||||
@ -521,8 +521,10 @@ void MergeTreeData::checkStoragePolicy(const StoragePolicyPtr & new_storage_poli
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void MergeTreeData::MergingParams::check(const NamesAndTypesList & columns) const
|
void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadata) const
|
||||||
{
|
{
|
||||||
|
const auto columns = metadata.getColumns().getAllPhysical();
|
||||||
|
|
||||||
if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing)
|
if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing)
|
||||||
throw Exception("Sign column for MergeTree cannot be specified in modes except Collapsing or VersionedCollapsing.",
|
throw Exception("Sign column for MergeTree cannot be specified in modes except Collapsing or VersionedCollapsing.",
|
||||||
ErrorCodes::LOGICAL_ERROR);
|
ErrorCodes::LOGICAL_ERROR);
|
||||||
@ -607,6 +609,21 @@ void MergeTreeData::MergingParams::check(const NamesAndTypesList & columns) cons
|
|||||||
throw Exception(
|
throw Exception(
|
||||||
"Column " + column_to_sum + " listed in columns to sum does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
"Column " + column_to_sum + " listed in columns to sum does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check that summing columns are not in partition key.
|
||||||
|
if (metadata.isPartitionKeyDefined())
|
||||||
|
{
|
||||||
|
auto partition_key_columns = metadata.getPartitionKey().column_names;
|
||||||
|
|
||||||
|
Names names_intersection;
|
||||||
|
std::set_intersection(columns_to_sum.begin(), columns_to_sum.end(),
|
||||||
|
partition_key_columns.begin(), partition_key_columns.end(),
|
||||||
|
std::back_inserter(names_intersection));
|
||||||
|
|
||||||
|
if (!names_intersection.empty())
|
||||||
|
throw Exception("Colums: " + Nested::createCommaSeparatedStringFrom(names_intersection) +
|
||||||
|
" listed both in colums to sum and in partition key. That is not allowed.", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode == MergingParams::Replacing)
|
if (mode == MergingParams::Replacing)
|
||||||
@ -638,7 +655,6 @@ String MergeTreeData::MergingParams::getModeName() const
|
|||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Int64 MergeTreeData::getMaxBlockNumber() const
|
Int64 MergeTreeData::getMaxBlockNumber() const
|
||||||
{
|
{
|
||||||
auto lock = lockParts();
|
auto lock = lockParts();
|
||||||
|
@ -301,7 +301,7 @@ public:
|
|||||||
Graphite::Params graphite_params;
|
Graphite::Params graphite_params;
|
||||||
|
|
||||||
/// Check that needed columns are present and have correct types.
|
/// Check that needed columns are present and have correct types.
|
||||||
void check(const NamesAndTypesList & columns) const;
|
void check(const StorageInMemoryMetadata & metadata) const;
|
||||||
|
|
||||||
String getModeName() const;
|
String getModeName() const;
|
||||||
};
|
};
|
||||||
|
@ -739,6 +739,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
|||||||
size_t sort_columns_size = sort_columns.size();
|
size_t sort_columns_size = sort_columns.size();
|
||||||
sort_description.reserve(sort_columns_size);
|
sort_description.reserve(sort_columns_size);
|
||||||
|
|
||||||
|
Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names;
|
||||||
|
|
||||||
Block header = pipes.at(0).getHeader();
|
Block header = pipes.at(0).getHeader();
|
||||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||||
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
||||||
@ -767,7 +769,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
|||||||
|
|
||||||
case MergeTreeData::MergingParams::Summing:
|
case MergeTreeData::MergingParams::Summing:
|
||||||
merged_transform = std::make_unique<SummingSortedTransform>(
|
merged_transform = std::make_unique<SummingSortedTransform>(
|
||||||
header, pipes.size(), sort_description, data.merging_params.columns_to_sum, merge_block_size);
|
header, pipes.size(), sort_description, data.merging_params.columns_to_sum, partition_key_columns, merge_block_size);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case MergeTreeData::MergingParams::Aggregating:
|
case MergeTreeData::MergingParams::Aggregating:
|
||||||
|
@ -1160,6 +1160,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
|||||||
size_t sort_columns_size = sort_columns.size();
|
size_t sort_columns_size = sort_columns.size();
|
||||||
sort_description.reserve(sort_columns_size);
|
sort_description.reserve(sort_columns_size);
|
||||||
|
|
||||||
|
Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names;
|
||||||
|
|
||||||
Block header = pipes.at(0).getHeader();
|
Block header = pipes.at(0).getHeader();
|
||||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||||
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
||||||
@ -1180,7 +1182,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
|
|||||||
|
|
||||||
case MergeTreeData::MergingParams::Summing:
|
case MergeTreeData::MergingParams::Summing:
|
||||||
return std::make_shared<SummingSortedTransform>(header, pipes.size(),
|
return std::make_shared<SummingSortedTransform>(header, pipes.size(),
|
||||||
sort_description, data.merging_params.columns_to_sum, max_block_size);
|
sort_description, data.merging_params.columns_to_sum, partition_key_columns, max_block_size);
|
||||||
|
|
||||||
case MergeTreeData::MergingParams::Aggregating:
|
case MergeTreeData::MergingParams::Aggregating:
|
||||||
return std::make_shared<AggregatingSortedTransform>(header, pipes.size(),
|
return std::make_shared<AggregatingSortedTransform>(header, pipes.size(),
|
||||||
|
@ -0,0 +1,20 @@
|
|||||||
|
---
|
||||||
|
1000000
|
||||||
|
---
|
||||||
|
17
|
||||||
|
---
|
||||||
|
0 17
|
||||||
|
---
|
||||||
|
0 17
|
||||||
|
1 17
|
||||||
|
2 17
|
||||||
|
3 17
|
||||||
|
4 17
|
||||||
|
5 17
|
||||||
|
6 17
|
||||||
|
7 17
|
||||||
|
8 17
|
||||||
|
9 17
|
||||||
|
10 17
|
||||||
|
11 17
|
||||||
|
12 17
|
@ -0,0 +1,26 @@
|
|||||||
|
DROP TABLE IF EXISTS tt_01373;
|
||||||
|
|
||||||
|
CREATE TABLE tt_01373
|
||||||
|
(a Int64, d Int64, val Int64)
|
||||||
|
ENGINE = SummingMergeTree PARTITION BY (a) ORDER BY (d);
|
||||||
|
|
||||||
|
SYSTEM STOP MERGES;
|
||||||
|
|
||||||
|
INSERT INTO tt_01373 SELECT number%13, number%17, 1 from numbers(1000000);
|
||||||
|
|
||||||
|
SELECT '---';
|
||||||
|
SELECT count(*) FROM tt_01373;
|
||||||
|
|
||||||
|
SELECT '---';
|
||||||
|
SELECT count(*) FROM tt_01373 FINAL;
|
||||||
|
|
||||||
|
SELECT '---';
|
||||||
|
SELECT a, count() FROM tt_01373 FINAL GROUP BY a ORDER BY a;
|
||||||
|
|
||||||
|
SYSTEM START MERGES;
|
||||||
|
|
||||||
|
OPTIMIZE TABLE tt_01373 FINAL;
|
||||||
|
SELECT '---';
|
||||||
|
SELECT a, count() FROM tt_01373 GROUP BY a ORDER BY a;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS tt_01373;
|
@ -0,0 +1,7 @@
|
|||||||
|
DROP TABLE IF EXISTS tt_error_1373;
|
||||||
|
|
||||||
|
CREATE TABLE tt_error_1373
|
||||||
|
( a Int64, d Int64, val Int64 )
|
||||||
|
ENGINE = SummingMergeTree((a, val)) PARTITION BY (a) ORDER BY (d); -- { serverError 36 }
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS tt_error_1373;
|
Loading…
Reference in New Issue
Block a user